diff --git a/.buildkite/pipelines/periodic-fwc.template.yml b/.buildkite/pipelines/periodic-fwc.template.yml index b9f00a649a14b..2ce3b6673543c 100644 --- a/.buildkite/pipelines/periodic-fwc.template.yml +++ b/.buildkite/pipelines/periodic-fwc.template.yml @@ -7,7 +7,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - preemptible: true matrix: setup: FWC_VERSION: $FWC_LIST diff --git a/.buildkite/pipelines/periodic-fwc.yml b/.buildkite/pipelines/periodic-fwc.yml index 434a091aa1dfe..1d47546474e47 100644 --- a/.buildkite/pipelines/periodic-fwc.yml +++ b/.buildkite/pipelines/periodic-fwc.yml @@ -1,16 +1,15 @@ # This file is auto-generated. See .buildkite/pipelines/periodic-fwc.template.yml steps: - - label: "{{matrix.FWC_VERSION}} / fwc" - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v$$FWC_VERSION#fwcTest -Dtests.bwc.snapshot=false + - label: $FWC_VERSION / fwc + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v$FWC_VERSION#fwcTest -Dtests.bwc.snapshot=false timeout_in_minutes: 300 agents: provider: gcp image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - preemptible: true matrix: setup: FWC_VERSION: [] env: - FWC_VERSION: "{{matrix.FWC_VERSION}}" + FWC_VERSION: $FWC_VERSION diff --git a/build-tools-internal/src/main/groovy/elasticsearch.build-scan.gradle b/build-tools-internal/src/main/groovy/elasticsearch.build-scan.gradle index 8702f5a9bf0e9..4113e1c1c9d20 100644 --- a/build-tools-internal/src/main/groovy/elasticsearch.build-scan.gradle +++ b/build-tools-internal/src/main/groovy/elasticsearch.build-scan.gradle @@ -133,6 +133,9 @@ develocity { } } else { tag 'LOCAL' + if (providers.systemProperty('idea.active').present) { + tag 'IDEA' + } } } } diff --git a/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle b/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle index c491d74c589c2..5ed55ab0d5a03 100644 --- a/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle +++ b/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle @@ -179,7 +179,7 @@ if (providers.systemProperty('idea.active').getOrNull() == 'true') { // this path is produced by the extractLibs task above String testLibraryPath = TestUtil.getTestLibraryPath("${elasticsearchProject.left()}/libs/native/libraries/build/platform") - + def enableIdeaCC = providers.gradleProperty("org.elasticsearch.idea-configuration-cache").getOrElse("true").toBoolean() idea { project { vcs = 'Git' @@ -209,6 +209,11 @@ if (providers.systemProperty('idea.active').getOrNull() == 'true') { } } runConfigurations { + defaults(org.jetbrains.gradle.ext.Gradle) { + scriptParameters = enableIdeaCC ? [ + '--configuration-cache' + ].join(' ') : '' + } defaults(JUnit) { vmParameters = [ '-ea', diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/MrjarPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/MrjarPlugin.java index b835bae815d07..ed5e4c9eb5102 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/MrjarPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/MrjarPlugin.java @@ -139,7 +139,7 @@ private SourceSet addSourceSet( compileOptions.getRelease().set(javaVersion); }); if (isMainSourceSet) { - project.getTasks().create(sourceSet.getJavadocTaskName(), Javadoc.class, javadocTask -> { + project.getTasks().register(sourceSet.getJavadocTaskName(), Javadoc.class, javadocTask -> { javadocTask.getJavadocTool().set(javaToolchains.javadocToolFor(spec -> { spec.getLanguageVersion().set(JavaLanguageVersion.of(javaVersion)); })); diff --git a/distribution/tools/java-version-checker/build.gradle b/distribution/tools/java-version-checker/build.gradle index 3d4ec5aced29c..e702699fbb392 100644 --- a/distribution/tools/java-version-checker/build.gradle +++ b/distribution/tools/java-version-checker/build.gradle @@ -1,6 +1,6 @@ apply plugin: 'elasticsearch.build' -compileJava { +tasks.named("compileJava").configure { options.release = 8 } diff --git a/docs/build.gradle b/docs/build.gradle index 505bf2fb1ddfb..0ebb4b498eabd 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -37,6 +37,7 @@ ext.docsFileTree = fileTree(projectDir) { } tasks.named("yamlRestTest") { + enabled = false if (buildParams.isSnapshotBuild() == false) { // LOOKUP is not available in snapshots systemProperty 'tests.rest.blacklist', [ @@ -47,6 +48,7 @@ tasks.named("yamlRestTest") { /* List of files that have snippets that will not work until platinum tests can occur ... */ tasks.named("buildRestTests").configure { + enabled = false getExpectedUnconvertedCandidates().addAll( 'reference/ml/anomaly-detection/ml-configuring-transform.asciidoc', 'reference/ml/anomaly-detection/apis/delete-calendar-event.asciidoc', diff --git a/docs/changelog/123569.yaml b/docs/changelog/123569.yaml new file mode 100644 index 0000000000000..c04601eead9f4 --- /dev/null +++ b/docs/changelog/123569.yaml @@ -0,0 +1,5 @@ +pr: 123569 +summary: Abort pending deletion on `IndicesService` close +area: Store +type: enhancement +issues: [] diff --git a/docs/changelog/123610.yaml b/docs/changelog/123610.yaml new file mode 100644 index 0000000000000..628d832f903dc --- /dev/null +++ b/docs/changelog/123610.yaml @@ -0,0 +1,5 @@ +pr: 123610 +summary: Disable concurrency when `top_hits` sorts on anything but `_score` +area: "Aggregations" +type: bug +issues: [] diff --git a/docs/docset.yml b/docs/docset.yml new file mode 100644 index 0000000000000..53e946a907e7f --- /dev/null +++ b/docs/docset.yml @@ -0,0 +1,506 @@ +project: 'Elasticsearch' +exclude: + - internal/* + - reference/esql/functions/kibana/docs/* + - reference/esql/functions/README.md +cross_links: + - beats + - cloud + - docs-content + - ecs + - eland + - elasticsearch-hadoop + - elasticsearch-java + - elasticsearch-js + - elasticsearch-net + - elasticsearch-php + - elasticsearch-py + - elasticsearch-rs + - elasticsearch-ruby + - go-elasticsearch + - kibana + - logstash +toc: + - toc: reference + - toc: release-notes + - toc: extend +subs: + ref: "https://www.elastic.co/guide/en/elasticsearch/reference/current" + ref-bare: "https://www.elastic.co/guide/en/elasticsearch/reference" + ref-8x: "https://www.elastic.co/guide/en/elasticsearch/reference/8.1" + ref-80: "https://www.elastic.co/guide/en/elasticsearch/reference/8.0" + ref-7x: "https://www.elastic.co/guide/en/elasticsearch/reference/7.17" + ref-70: "https://www.elastic.co/guide/en/elasticsearch/reference/7.0" + ref-60: "https://www.elastic.co/guide/en/elasticsearch/reference/6.0" + ref-64: "https://www.elastic.co/guide/en/elasticsearch/reference/6.4" + xpack-ref: "https://www.elastic.co/guide/en/x-pack/6.2" + logstash-ref: "https://www.elastic.co/guide/en/logstash/current" + kibana-ref: "https://www.elastic.co/guide/en/kibana/current" + kibana-ref-all: "https://www.elastic.co/guide/en/kibana" + beats-ref-root: "https://www.elastic.co/guide/en/beats" + beats-ref: "https://www.elastic.co/guide/en/beats/libbeat/current" + beats-ref-60: "https://www.elastic.co/guide/en/beats/libbeat/6.0" + beats-ref-63: "https://www.elastic.co/guide/en/beats/libbeat/6.3" + beats-devguide: "https://www.elastic.co/guide/en/beats/devguide/current" + auditbeat-ref: "https://www.elastic.co/guide/en/beats/auditbeat/current" + packetbeat-ref: "https://www.elastic.co/guide/en/beats/packetbeat/current" + metricbeat-ref: "https://www.elastic.co/guide/en/beats/metricbeat/current" + filebeat-ref: "https://www.elastic.co/guide/en/beats/filebeat/current" + functionbeat-ref: "https://www.elastic.co/guide/en/beats/functionbeat/current" + winlogbeat-ref: "https://www.elastic.co/guide/en/beats/winlogbeat/current" + heartbeat-ref: "https://www.elastic.co/guide/en/beats/heartbeat/current" + journalbeat-ref: "https://www.elastic.co/guide/en/beats/journalbeat/current" + ingest-guide: "https://www.elastic.co/guide/en/ingest/current" + fleet-guide: "https://www.elastic.co/guide/en/fleet/current" + apm-guide-ref: "https://www.elastic.co/guide/en/apm/guide/current" + apm-guide-7x: "https://www.elastic.co/guide/en/apm/guide/7.17" + apm-app-ref: "https://www.elastic.co/guide/en/kibana/current" + apm-agents-ref: "https://www.elastic.co/guide/en/apm/agent" + apm-android-ref: "https://www.elastic.co/guide/en/apm/agent/android/current" + apm-py-ref: "https://www.elastic.co/guide/en/apm/agent/python/current" + apm-py-ref-3x: "https://www.elastic.co/guide/en/apm/agent/python/3.x" + apm-node-ref-index: "https://www.elastic.co/guide/en/apm/agent/nodejs" + apm-node-ref: "https://www.elastic.co/guide/en/apm/agent/nodejs/current" + apm-node-ref-1x: "https://www.elastic.co/guide/en/apm/agent/nodejs/1.x" + apm-rum-ref: "https://www.elastic.co/guide/en/apm/agent/rum-js/current" + apm-ruby-ref: "https://www.elastic.co/guide/en/apm/agent/ruby/current" + apm-java-ref: "https://www.elastic.co/guide/en/apm/agent/java/current" + apm-go-ref: "https://www.elastic.co/guide/en/apm/agent/go/current" + apm-dotnet-ref: "https://www.elastic.co/guide/en/apm/agent/dotnet/current" + apm-php-ref: "https://www.elastic.co/guide/en/apm/agent/php/current" + apm-ios-ref: "https://www.elastic.co/guide/en/apm/agent/swift/current" + apm-lambda-ref: "https://www.elastic.co/guide/en/apm/lambda/current" + apm-attacher-ref: "https://www.elastic.co/guide/en/apm/attacher/current" + docker-logging-ref: "https://www.elastic.co/guide/en/beats/loggingplugin/current" + esf-ref: "https://www.elastic.co/guide/en/esf/current" + kinesis-firehose-ref: "https://www.elastic.co/guide/en/kinesis/{{kinesis_version}}" + estc-welcome-current: "https://www.elastic.co/guide/en/starting-with-the-elasticsearch-platform-and-its-solutions/current" + estc-welcome: "https://www.elastic.co/guide/en/starting-with-the-elasticsearch-platform-and-its-solutions/current" + estc-welcome-all: "https://www.elastic.co/guide/en/starting-with-the-elasticsearch-platform-and-its-solutions" + hadoop-ref: "https://www.elastic.co/guide/en/elasticsearch/hadoop/current" + stack-ref: "https://www.elastic.co/guide/en/elastic-stack/current" + stack-ref-67: "https://www.elastic.co/guide/en/elastic-stack/6.7" + stack-ref-68: "https://www.elastic.co/guide/en/elastic-stack/6.8" + stack-ref-70: "https://www.elastic.co/guide/en/elastic-stack/7.0" + stack-ref-80: "https://www.elastic.co/guide/en/elastic-stack/8.0" + stack-ov: "https://www.elastic.co/guide/en/elastic-stack-overview/current" + stack-gs: "https://www.elastic.co/guide/en/elastic-stack-get-started/current" + stack-gs-current: "https://www.elastic.co/guide/en/elastic-stack-get-started/current" + javaclient: "https://www.elastic.co/guide/en/elasticsearch/client/java-api/current" + java-api-client: "https://www.elastic.co/guide/en/elasticsearch/client/java-api-client/current" + java-rest: "https://www.elastic.co/guide/en/elasticsearch/client/java-rest/current" + jsclient: "https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current" + jsclient-current: "https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current" + es-ruby-client: "https://www.elastic.co/guide/en/elasticsearch/client/ruby-api/current" + es-dotnet-client: "https://www.elastic.co/guide/en/elasticsearch/client/net-api/current" + es-php-client: "https://www.elastic.co/guide/en/elasticsearch/client/php-api/current" + es-python-client: "https://www.elastic.co/guide/en/elasticsearch/client/python-api/current" + defguide: "https://www.elastic.co/guide/en/elasticsearch/guide/2.x" + painless: "https://www.elastic.co/guide/en/elasticsearch/painless/current" + plugins: "https://www.elastic.co/guide/en/elasticsearch/plugins/current" + plugins-8x: "https://www.elastic.co/guide/en/elasticsearch/plugins/8.1" + plugins-7x: "https://www.elastic.co/guide/en/elasticsearch/plugins/7.17" + plugins-6x: "https://www.elastic.co/guide/en/elasticsearch/plugins/6.8" + glossary: "https://www.elastic.co/guide/en/elastic-stack-glossary/current" + upgrade_guide: "https://www.elastic.co/products/upgrade_guide" + blog-ref: "https://www.elastic.co/blog/" + curator-ref: "https://www.elastic.co/guide/en/elasticsearch/client/curator/current" + curator-ref-current: "https://www.elastic.co/guide/en/elasticsearch/client/curator/current" + metrics-ref: "https://www.elastic.co/guide/en/metrics/current" + metrics-guide: "https://www.elastic.co/guide/en/metrics/guide/current" + logs-ref: "https://www.elastic.co/guide/en/logs/current" + logs-guide: "https://www.elastic.co/guide/en/logs/guide/current" + uptime-guide: "https://www.elastic.co/guide/en/uptime/current" + observability-guide: "https://www.elastic.co/guide/en/observability/current" + observability-guide-all: "https://www.elastic.co/guide/en/observability" + siem-guide: "https://www.elastic.co/guide/en/siem/guide/current" + security-guide: "https://www.elastic.co/guide/en/security/current" + security-guide-all: "https://www.elastic.co/guide/en/security" + endpoint-guide: "https://www.elastic.co/guide/en/endpoint/current" + sql-odbc: "https://www.elastic.co/guide/en/elasticsearch/sql-odbc/current" + ecs-ref: "https://www.elastic.co/guide/en/ecs/current" + ecs-logging-ref: "https://www.elastic.co/guide/en/ecs-logging/overview/current" + ecs-logging-go-logrus-ref: "https://www.elastic.co/guide/en/ecs-logging/go-logrus/current" + ecs-logging-go-zap-ref: "https://www.elastic.co/guide/en/ecs-logging/go-zap/current" + ecs-logging-go-zerolog-ref: "https://www.elastic.co/guide/en/ecs-logging/go-zap/current" + ecs-logging-java-ref: "https://www.elastic.co/guide/en/ecs-logging/java/current" + ecs-logging-dotnet-ref: "https://www.elastic.co/guide/en/ecs-logging/dotnet/current" + ecs-logging-nodejs-ref: "https://www.elastic.co/guide/en/ecs-logging/nodejs/current" + ecs-logging-php-ref: "https://www.elastic.co/guide/en/ecs-logging/php/current" + ecs-logging-python-ref: "https://www.elastic.co/guide/en/ecs-logging/python/current" + ecs-logging-ruby-ref: "https://www.elastic.co/guide/en/ecs-logging/ruby/current" + ml-docs: "https://www.elastic.co/guide/en/machine-learning/current" + eland-docs: "https://www.elastic.co/guide/en/elasticsearch/client/eland/current" + eql-ref: "https://eql.readthedocs.io/en/latest/query-guide" + extendtrial: "https://www.elastic.co/trialextension" + wikipedia: "https://en.wikipedia.org/wiki" + forum: "https://discuss.elastic.co/" + xpack-forum: "https://discuss.elastic.co/c/50-x-pack" + security-forum: "https://discuss.elastic.co/c/x-pack/shield" + watcher-forum: "https://discuss.elastic.co/c/x-pack/watcher" + monitoring-forum: "https://discuss.elastic.co/c/x-pack/marvel" + graph-forum: "https://discuss.elastic.co/c/x-pack/graph" + apm-forum: "https://discuss.elastic.co/c/apm" + enterprise-search-ref: "https://www.elastic.co/guide/en/enterprise-search/current" + app-search-ref: "https://www.elastic.co/guide/en/app-search/current" + workplace-search-ref: "https://www.elastic.co/guide/en/workplace-search/current" + enterprise-search-node-ref: "https://www.elastic.co/guide/en/enterprise-search-clients/enterprise-search-node/current" + enterprise-search-php-ref: "https://www.elastic.co/guide/en/enterprise-search-clients/php/current" + enterprise-search-python-ref: "https://www.elastic.co/guide/en/enterprise-search-clients/python/current" + enterprise-search-ruby-ref: "https://www.elastic.co/guide/en/enterprise-search-clients/ruby/current" + elastic-maps-service: "https://maps.elastic.co" + integrations-docs: "https://docs.elastic.co/en/integrations" + integrations-devguide: "https://www.elastic.co/guide/en/integrations-developer/current" + time-units: "https://www.elastic.co/guide/en/elasticsearch/reference/current/api-conventions.html#time-units" + byte-units: "https://www.elastic.co/guide/en/elasticsearch/reference/current/api-conventions.html#byte-units" + apm-py-ref-v: "https://www.elastic.co/guide/en/apm/agent/python/current" + apm-node-ref-v: "https://www.elastic.co/guide/en/apm/agent/nodejs/current" + apm-rum-ref-v: "https://www.elastic.co/guide/en/apm/agent/rum-js/current" + apm-ruby-ref-v: "https://www.elastic.co/guide/en/apm/agent/ruby/current" + apm-java-ref-v: "https://www.elastic.co/guide/en/apm/agent/java/current" + apm-go-ref-v: "https://www.elastic.co/guide/en/apm/agent/go/current" + apm-ios-ref-v: "https://www.elastic.co/guide/en/apm/agent/swift/current" + apm-dotnet-ref-v: "https://www.elastic.co/guide/en/apm/agent/dotnet/current" + apm-php-ref-v: "https://www.elastic.co/guide/en/apm/agent/php/current" + ecloud: "Elastic Cloud" + esf: "Elastic Serverless Forwarder" + ess: "Elasticsearch Service" + ece: "Elastic Cloud Enterprise" + eck: "Elastic Cloud on Kubernetes" + serverless-full: "Elastic Cloud Serverless" + serverless-short: "Serverless" + es-serverless: "Elasticsearch Serverless" + es3: "Elasticsearch Serverless" + obs-serverless: "Elastic Observability Serverless" + sec-serverless: "Elastic Security Serverless" + serverless-docs: "https://docs.elastic.co/serverless" + cloud: "https://www.elastic.co/guide/en/cloud/current" + ess-utm-params: "?page=docs&placement=docs-body" + ess-baymax: "?page=docs&placement=docs-body" + ess-trial: "https://cloud.elastic.co/registration?page=docs&placement=docs-body" + ess-product: "https://www.elastic.co/cloud/elasticsearch-service?page=docs&placement=docs-body" + ess-console: "https://cloud.elastic.co?page=docs&placement=docs-body" + ess-console-name: "Elasticsearch Service Console" + ess-deployments: "https://cloud.elastic.co/deployments?page=docs&placement=docs-body" + ece-ref: "https://www.elastic.co/guide/en/cloud-enterprise/current" + eck-ref: "https://www.elastic.co/guide/en/cloud-on-k8s/current" + ess-leadin: "You can run Elasticsearch on your own hardware or use our hosted Elasticsearch Service that is available on AWS, GCP, and Azure. https://cloud.elastic.co/registration{ess-utm-params}[Try the Elasticsearch Service for free]." + ess-leadin-short: "Our hosted Elasticsearch Service is available on AWS, GCP, and Azure, and you can https://cloud.elastic.co/registration{ess-utm-params}[try it for free]." + ess-icon: "image:https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg[link=\"https://cloud.elastic.co/registration{ess-utm-params}\", title=\"Supported on Elasticsearch Service\"]" + ece-icon: "image:https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud_ece.svg[link=\"https://cloud.elastic.co/registration{ess-utm-params}\", title=\"Supported on Elastic Cloud Enterprise\"]" + cloud-only: "This feature is designed for indirect use by https://cloud.elastic.co/registration{ess-utm-params}[Elasticsearch Service], https://www.elastic.co/guide/en/cloud-enterprise/{ece-version-link}[Elastic Cloud Enterprise], and https://www.elastic.co/guide/en/cloud-on-k8s/current[Elastic Cloud on Kubernetes]. Direct use is not supported." + ess-setting-change: "image:https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg[link=\"{ess-trial}\", title=\"Supported on {ess}\"] indicates a change to a supported https://www.elastic.co/guide/en/cloud/current/ec-add-user-settings.html[user setting] for Elasticsearch Service." + ess-skip-section: "If you use Elasticsearch Service, skip this section. Elasticsearch Service handles these changes for you." + api-cloud: "https://www.elastic.co/docs/api/doc/cloud" + api-ece: "https://www.elastic.co/docs/api/doc/cloud-enterprise" + api-kibana-serverless: "https://www.elastic.co/docs/api/doc/serverless" + es-feature-flag: "This feature is in development and not yet available for use. This documentation is provided for informational purposes only." + es-ref-dir: "'{{elasticsearch-root}}/docs/reference'" + apm-app: "APM app" + uptime-app: "Uptime app" + synthetics-app: "Synthetics app" + logs-app: "Logs app" + metrics-app: "Metrics app" + infrastructure-app: "Infrastructure app" + siem-app: "SIEM app" + security-app: "Elastic Security app" + ml-app: "Machine Learning" + dev-tools-app: "Dev Tools" + ingest-manager-app: "Ingest Manager" + stack-manage-app: "Stack Management" + stack-monitor-app: "Stack Monitoring" + alerts-ui: "Alerts and Actions" + rules-ui: "Rules" + rac-ui: "Rules and Connectors" + connectors-ui: "Connectors" + connectors-feature: "Actions and Connectors" + stack-rules-feature: "Stack Rules" + user-experience: "User Experience" + ems: "Elastic Maps Service" + ems-init: "EMS" + hosted-ems: "Elastic Maps Server" + ipm-app: "Index Pattern Management" + ingest-pipelines: "ingest pipelines" + ingest-pipelines-app: "Ingest Pipelines" + ingest-pipelines-cap: "Ingest pipelines" + ls-pipelines: "Logstash pipelines" + ls-pipelines-app: "Logstash Pipelines" + maint-windows: "maintenance windows" + maint-windows-app: "Maintenance Windows" + maint-windows-cap: "Maintenance windows" + custom-roles-app: "Custom Roles" + data-source: "data view" + data-sources: "data views" + data-source-caps: "Data View" + data-sources-caps: "Data Views" + data-source-cap: "Data view" + data-sources-cap: "Data views" + project-settings: "Project settings" + manage-app: "Management" + index-manage-app: "Index Management" + data-views-app: "Data Views" + rules-app: "Rules" + saved-objects-app: "Saved Objects" + tags-app: "Tags" + api-keys-app: "API keys" + transforms-app: "Transforms" + connectors-app: "Connectors" + files-app: "Files" + reports-app: "Reports" + maps-app: "Maps" + alerts-app: "Alerts" + crawler: "Enterprise Search web crawler" + ents: "Enterprise Search" + app-search-crawler: "App Search web crawler" + agent: "Elastic Agent" + agents: "Elastic Agents" + fleet: "Fleet" + fleet-server: "Fleet Server" + integrations-server: "Integrations Server" + ingest-manager: "Ingest Manager" + ingest-management: "ingest management" + package-manager: "Elastic Package Manager" + integrations: "Integrations" + package-registry: "Elastic Package Registry" + artifact-registry: "Elastic Artifact Registry" + aws: "AWS" + stack: "Elastic Stack" + xpack: "X-Pack" + es: "Elasticsearch" + kib: "Kibana" + esms: "Elastic Stack Monitoring Service" + esms-init: "ESMS" + ls: "Logstash" + beats: "Beats" + auditbeat: "Auditbeat" + filebeat: "Filebeat" + heartbeat: "Heartbeat" + metricbeat: "Metricbeat" + packetbeat: "Packetbeat" + winlogbeat: "Winlogbeat" + functionbeat: "Functionbeat" + journalbeat: "Journalbeat" + es-sql: "Elasticsearch SQL" + esql: "ES|QL" + elastic-agent: "Elastic Agent" + k8s: "Kubernetes" + log-driver-long: "Elastic Logging Plugin for Docker" + security: "X-Pack security" + security-features: "security features" + operator-feature: "operator privileges feature" + es-security-features: "Elasticsearch security features" + stack-security-features: "Elastic Stack security features" + endpoint-sec: "Endpoint Security" + endpoint-cloud-sec: "Endpoint and Cloud Security" + elastic-defend: "Elastic Defend" + elastic-sec: "Elastic Security" + elastic-endpoint: "Elastic Endpoint" + swimlane: "Swimlane" + sn: "ServiceNow" + sn-itsm: "ServiceNow ITSM" + sn-itom: "ServiceNow ITOM" + sn-sir: "ServiceNow SecOps" + jira: "Jira" + ibm-r: "IBM Resilient" + webhook: "Webhook" + webhook-cm: "Webhook - Case Management" + opsgenie: "Opsgenie" + bedrock: "Amazon Bedrock" + gemini: "Google Gemini" + hive: "TheHive" + monitoring: "X-Pack monitoring" + monitor-features: "monitoring features" + stack-monitor-features: "Elastic Stack monitoring features" + watcher: "Watcher" + alert-features: "alerting features" + reporting: "X-Pack reporting" + report-features: "reporting features" + graph: "X-Pack graph" + graph-features: "graph analytics features" + searchprofiler: "Search Profiler" + xpackml: "X-Pack machine learning" + ml: "machine learning" + ml-cap: "Machine learning" + ml-init: "ML" + ml-features: "machine learning features" + stack-ml-features: "Elastic Stack machine learning features" + ccr: "cross-cluster replication" + ccr-cap: "Cross-cluster replication" + ccr-init: "CCR" + ccs: "cross-cluster search" + ccs-cap: "Cross-cluster search" + ccs-init: "CCS" + ilm: "index lifecycle management" + ilm-cap: "Index lifecycle management" + ilm-init: "ILM" + dlm: "data lifecycle management" + dlm-cap: "Data lifecycle management" + dlm-init: "DLM" + search-snap: "searchable snapshot" + search-snaps: "searchable snapshots" + search-snaps-cap: "Searchable snapshots" + slm: "snapshot lifecycle management" + slm-cap: "Snapshot lifecycle management" + slm-init: "SLM" + rollup-features: "data rollup features" + ipm: "index pattern management" + ipm-cap: "Index pattern" + rollup: "rollup" + rollup-cap: "Rollup" + rollups: "rollups" + rollups-cap: "Rollups" + rollup-job: "rollup job" + rollup-jobs: "rollup jobs" + rollup-jobs-cap: "Rollup jobs" + dfeed: "datafeed" + dfeeds: "datafeeds" + dfeed-cap: "Datafeed" + dfeeds-cap: "Datafeeds" + ml-jobs: "machine learning jobs" + ml-jobs-cap: "Machine learning jobs" + anomaly-detect: "anomaly detection" + anomaly-detect-cap: "Anomaly detection" + anomaly-job: "anomaly detection job" + anomaly-jobs: "anomaly detection jobs" + anomaly-jobs-cap: "Anomaly detection jobs" + dataframe: "data frame" + dataframes: "data frames" + dataframe-cap: "Data frame" + dataframes-cap: "Data frames" + watcher-transform: "payload transform" + watcher-transforms: "payload transforms" + watcher-transform-cap: "Payload transform" + watcher-transforms-cap: "Payload transforms" + transform: "transform" + transforms: "transforms" + transform-cap: "Transform" + transforms-cap: "Transforms" + dataframe-transform: "transform" + dataframe-transform-cap: "Transform" + dataframe-transforms: "transforms" + dataframe-transforms-cap: "Transforms" + dfanalytics-cap: "Data frame analytics" + dfanalytics: "data frame analytics" + dataframe-analytics-config: "'{dataframe} analytics config'" + dfanalytics-job: "'{dataframe} analytics job'" + dfanalytics-jobs: "'{dataframe} analytics jobs'" + dfanalytics-jobs-cap: "'{dataframe-cap} analytics jobs'" + cdataframe: "continuous data frame" + cdataframes: "continuous data frames" + cdataframe-cap: "Continuous data frame" + cdataframes-cap: "Continuous data frames" + cdataframe-transform: "continuous transform" + cdataframe-transforms: "continuous transforms" + cdataframe-transforms-cap: "Continuous transforms" + ctransform: "continuous transform" + ctransform-cap: "Continuous transform" + ctransforms: "continuous transforms" + ctransforms-cap: "Continuous transforms" + oldetection: "outlier detection" + oldetection-cap: "Outlier detection" + olscore: "outlier score" + olscores: "outlier scores" + fiscore: "feature influence score" + evaluatedf-api: "evaluate {dataframe} analytics API" + evaluatedf-api-cap: "Evaluate {dataframe} analytics API" + binarysc: "binary soft classification" + binarysc-cap: "Binary soft classification" + regression: "regression" + regression-cap: "Regression" + reganalysis: "regression analysis" + reganalysis-cap: "Regression analysis" + depvar: "dependent variable" + feature-var: "feature variable" + feature-vars: "feature variables" + feature-vars-cap: "Feature variables" + classification: "classification" + classification-cap: "Classification" + classanalysis: "classification analysis" + classanalysis-cap: "Classification analysis" + infer-cap: "Inference" + infer: "inference" + lang-ident-cap: "Language identification" + lang-ident: "language identification" + data-viz: "Data Visualizer" + file-data-viz: "File Data Visualizer" + feat-imp: "feature importance" + feat-imp-cap: "Feature importance" + nlp: "natural language processing" + nlp-cap: "Natural language processing" + apm-agent: "APM agent" + apm-go-agent: "Elastic APM Go agent" + apm-go-agents: "Elastic APM Go agents" + apm-ios-agent: "Elastic APM iOS agent" + apm-ios-agents: "Elastic APM iOS agents" + apm-java-agent: "Elastic APM Java agent" + apm-java-agents: "Elastic APM Java agents" + apm-dotnet-agent: "Elastic APM .NET agent" + apm-dotnet-agents: "Elastic APM .NET agents" + apm-node-agent: "Elastic APM Node.js agent" + apm-node-agents: "Elastic APM Node.js agents" + apm-php-agent: "Elastic APM PHP agent" + apm-php-agents: "Elastic APM PHP agents" + apm-py-agent: "Elastic APM Python agent" + apm-py-agents: "Elastic APM Python agents" + apm-ruby-agent: "Elastic APM Ruby agent" + apm-ruby-agents: "Elastic APM Ruby agents" + apm-rum-agent: "Elastic APM Real User Monitoring (RUM) JavaScript agent" + apm-rum-agents: "Elastic APM RUM JavaScript agents" + apm-lambda-ext: "Elastic APM AWS Lambda extension" + project-monitors: "project monitors" + project-monitors-cap: "Project monitors" + private-location: "Private Location" + private-locations: "Private Locations" + pwd: "YOUR_PASSWORD" + esh: "ES-Hadoop" + default-dist: "default distribution" + oss-dist: "OSS-only distribution" + observability: "Observability" + api-request-title: "Request" + api-prereq-title: "Prerequisites" + api-description-title: "Description" + api-path-parms-title: "Path parameters" + api-query-parms-title: "Query parameters" + api-request-body-title: "Request body" + api-response-codes-title: "Response codes" + api-response-body-title: "Response body" + api-example-title: "Example" + api-examples-title: "Examples" + api-definitions-title: "Properties" + multi-arg: "†footnoteref:[multi-arg,This parameter accepts multiple arguments.]" + multi-arg-ref: "†footnoteref:[multi-arg]" + yes-icon: "image:https://doc-icons.s3.us-east-2.amazonaws.com/icon-yes.png[Yes,20,15]" + no-icon: "image:https://doc-icons.s3.us-east-2.amazonaws.com/icon-no.png[No,20,15]" + es-repo: "https://github.com/elastic/elasticsearch/" + es-issue: "https://github.com/elastic/elasticsearch/issues/" + es-pull: "https://github.com/elastic/elasticsearch/pull/" + es-commit: "https://github.com/elastic/elasticsearch/commit/" + kib-repo: "https://github.com/elastic/kibana/" + kib-issue: "https://github.com/elastic/kibana/issues/" + kibana-issue: "'{kib-repo}issues/'" + kib-pull: "https://github.com/elastic/kibana/pull/" + kibana-pull: "'{kib-repo}pull/'" + kib-commit: "https://github.com/elastic/kibana/commit/" + ml-repo: "https://github.com/elastic/ml-cpp/" + ml-issue: "https://github.com/elastic/ml-cpp/issues/" + ml-pull: "https://github.com/elastic/ml-cpp/pull/" + ml-commit: "https://github.com/elastic/ml-cpp/commit/" + apm-repo: "https://github.com/elastic/apm-server/" + apm-issue: "https://github.com/elastic/apm-server/issues/" + apm-pull: "https://github.com/elastic/apm-server/pull/" + kibana-blob: "https://github.com/elastic/kibana/blob/current/" + apm-get-started-ref: "https://www.elastic.co/guide/en/apm/get-started/current" + apm-server-ref: "https://www.elastic.co/guide/en/apm/server/current" + apm-server-ref-v: "https://www.elastic.co/guide/en/apm/server/current" + apm-server-ref-m: "https://www.elastic.co/guide/en/apm/server/master" + apm-server-ref-62: "https://www.elastic.co/guide/en/apm/server/6.2" + apm-server-ref-64: "https://www.elastic.co/guide/en/apm/server/6.4" + apm-server-ref-70: "https://www.elastic.co/guide/en/apm/server/7.0" + apm-overview-ref-v: "https://www.elastic.co/guide/en/apm/get-started/current" + apm-overview-ref-70: "https://www.elastic.co/guide/en/apm/get-started/7.0" + apm-overview-ref-m: "https://www.elastic.co/guide/en/apm/get-started/master" + infra-guide: "https://www.elastic.co/guide/en/infrastructure/guide/current" + a-data-source: "a data view" + icon-bug: "pass:[]" + icon-checkInCircleFilled: "pass:[]" + icon-warningFilled: "pass:[]" diff --git a/docs/extend/creating-classic-plugins.md b/docs/extend/creating-classic-plugins.md new file mode 100644 index 0000000000000..16e4c38f3f087 --- /dev/null +++ b/docs/extend/creating-classic-plugins.md @@ -0,0 +1,68 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/creating-classic-plugins.html +--- + +# Creating classic plugins [creating-classic-plugins] + +Classic plugins provide {{es}} with mechanisms for custom authentication, authorization, scoring, and more. + +::::{admonition} Plugin release lifecycle +:class: important + +Classic plugins require you to build a new version for each new {{es}} release. This version is checked when the plugin is installed and when it is loaded. {{es}} will refuse to start in the presence of plugins with the incorrect `elasticsearch.version`. + +:::: + + + +## Classic plugin file structure [_classic_plugin_file_structure] + +Classic plugins are ZIP files composed of JAR files and [a metadata file called `plugin-descriptor.properties`](/extend/plugin-descriptor-file-classic.md), a Java properties file that describes the plugin. + +Note that only JAR files at the root of the plugin are added to the classpath for the plugin. If you need other resources, package them into a resources JAR. + + +## Example plugins [_example_plugins] + +The {{es}} repository contains [examples of plugins](https://github.com/elastic/elasticsearch/tree/main/plugins/examples). Some of these include: + +* a plugin with [custom settings](https://github.com/elastic/elasticsearch/tree/main/plugins/examples/custom-settings) +* a plugin with a [custom ingest processor](https://github.com/elastic/elasticsearch/tree/main/plugins/examples/custom-processor) +* adding [custom rest endpoints](https://github.com/elastic/elasticsearch/tree/main/plugins/examples/rest-handler) +* adding a [custom rescorer](https://github.com/elastic/elasticsearch/tree/main/plugins/examples/rescore) +* a script [implemented in Java](https://github.com/elastic/elasticsearch/tree/main/plugins/examples/script-expert-scoring) + +These examples provide the bare bones needed to get started. For more information about how to write a plugin, we recommend looking at the [source code of existing plugins](https://github.com/elastic/elasticsearch/tree/main/plugins/) for inspiration. + + +## Testing your plugin [_testing_your_plugin] + +Use `bin/elasticsearch-plugin install file:///path/to/your/plugin` to install your plugin for testing. The Java plugin is auto-loaded only if it’s in the `plugins/` directory. + + +## Java Security permissions [plugin-authors-jsm] + +Some plugins may need additional security permissions. A plugin can include the optional `plugin-security.policy` file containing `grant` statements for additional permissions. Any additional permissions will be displayed to the user with a large warning, and they will have to confirm them when installing the plugin interactively. So if possible, it is best to avoid requesting any spurious permissions! + +If you are using the {{es}} Gradle build system, place this file in `src/main/plugin-metadata` and it will be applied during unit tests as well. + +The Java security model is stack-based, and additional permissions are granted to the jars in your plugin, so you have to write proper security code around operations requiring elevated privileges. You might add a check to prevent unprivileged code (such as scripts) from gaining escalated permissions. For example: + +```java +// ES permission you should check before doPrivileged() blocks +import org.elasticsearch.SpecialPermission; + +SecurityManager sm = System.getSecurityManager(); +if (sm != null) { + // unprivileged code such as scripts do not have SpecialPermission + sm.checkPermission(new SpecialPermission()); +} +AccessController.doPrivileged( + // sensitive operation +); +``` + +Check [Secure Coding Guidelines for Java SE](https://www.oracle.com/technetwork/java/seccodeguide-139067.md) for more information. + + diff --git a/docs/extend/creating-stable-plugins.md b/docs/extend/creating-stable-plugins.md new file mode 100644 index 0000000000000..8fb8b7825cb25 --- /dev/null +++ b/docs/extend/creating-stable-plugins.md @@ -0,0 +1,91 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/creating-stable-plugins.html +--- + +# Creating text analysis plugins with the stable plugin API [creating-stable-plugins] + +Text analysis plugins provide {{es}} with custom [Lucene analyzers, token filters, character filters, and tokenizers](docs-content://manage-data/data-store/text-analysis.md). + + +## The stable plugin API [_the_stable_plugin_api] + +Text analysis plugins can be developed against the stable plugin API. This API consists of the following dependencies: + +* `plugin-api` - an API used by plugin developers to implement custom {{es}} plugins. +* `plugin-analysis-api` - an API used by plugin developers to implement analysis plugins and integrate them into {{es}}. +* `lucene-analysis-common` - a dependency of `plugin-analysis-api` that contains core Lucene analysis interfaces like `Tokenizer`, `Analyzer`, and `TokenStream`. + +For new versions of {{es}} within the same major version, plugins built against this API does not need to be recompiled. Future versions of the API will be backwards compatible and plugins are binary compatible with future versions of {{es}}. In other words, once you have a working artifact, you can re-use it when you upgrade {{es}} to a new bugfix or minor version. + +A text analysis plugin can implement four factory classes that are provided by the analysis plugin API. + +* `AnalyzerFactory` to create a Lucene analyzer +* `CharFilterFactory` to create a character character filter +* `TokenFilterFactory` to create a Lucene token filter +* `TokenizerFactory` to create a Lucene tokenizer + +The key to implementing a stable plugin is the `@NamedComponent` annotation. Many of {{es}}'s components have names that are used in configurations. For example, the keyword analyzer is referenced in configuration with the name `"keyword"`. Once your custom plugin is installed in your cluster, your named components may be referenced by name in these configurations as well. + +You can also create text analysis plugins as a [classic plugin](/extend/creating-classic-plugins.md). However, classic plugins are pinned to a specific version of {{es}}. You need to recompile them when upgrading {{es}}. Because classic plugins are built against internal APIs that can change, upgrading to a new version may require code changes. + + +## Stable plugin file structure [_stable_plugin_file_structure] + +Stable plugins are ZIP files composed of JAR files and two metadata files: + +* `stable-plugin-descriptor.properties` - a Java properties file that describes the plugin. Refer to [The plugin descriptor file for stable plugins](/extend/plugin-descriptor-file-stable.md). +* `named_components.json` - a JSON file mapping interfaces to key-value pairs of component names and implementation classes. + +Note that only JAR files at the root of the plugin are added to the classpath for the plugin. If you need other resources, package them into a resources JAR. + + +## Development process [_development_process] + +Elastic provides a Gradle plugin, `elasticsearch.stable-esplugin`, that makes it easier to develop and package stable plugins. The steps in this section assume you use this plugin. However, you don’t need Gradle to create plugins. + +The {{es}} Github repository contains [an example analysis plugin](https://github.com/elastic/elasticsearch/tree/main/plugins/examples/stable-analysis). The example `build.gradle` build script provides a good starting point for developing your own plugin. + + +### Prerequisites [_prerequisites] + +Plugins are written in Java, so you need to install a Java Development Kit (JDK). Install Gradle if you want to use Gradle. + + +### Step by step [_step_by_step] + +1. Create a directory for your project. +2. Copy the example `build.gradle` build script to your project directory. Note that this build script uses the `elasticsearch.stable-esplugin` gradle plugin to build your plugin. +3. Edit the `build.gradle` build script: + + * Add a definition for the `pluginApiVersion` and matching `luceneVersion` variables to the top of the file. You can find these versions in the `build-tools-internal/version.properties` file in the [Elasticsearch Github repository](https://github.com/elastic/elasticsearch/). + * Edit the `name` and `description` in the `esplugin` section of the build script. This will create the plugin descriptor file. If you’re not using the `elasticsearch.stable-esplugin` gradle plugin, refer to [The plugin descriptor file for stable plugins](/extend/plugin-descriptor-file-stable.md) to create the file manually. + * Add module information. + * Ensure you have declared the following compile-time dependencies. These dependencies are compile-time only because {{es}} will provide these libraries at runtime. + + * `org.elasticsearch.plugin:elasticsearch-plugin-api` + * `org.elasticsearch.plugin:elasticsearch-plugin-analysis-api` + * `org.apache.lucene:lucene-analysis-common` + + * For unit testing, ensure these dependencies have also been added to the `build.gradle` script as `testImplementation` dependencies. + +4. Implement an interface from the analysis plugin API, annotating it with `NamedComponent`. Refer to [Example text analysis plugin](/extend/example-text-analysis-plugin.md) for an example. +5. You should now be able to assemble a plugin ZIP file by running: + + ```sh + gradle bundlePlugin + ``` + + The resulting plugin ZIP file is written to the `build/distributions` directory. + + + +### YAML REST tests [_yaml_rest_tests] + +The Gradle `elasticsearch.yaml-rest-test` plugin enables testing of your plugin using the [{{es}} yamlRestTest framework](https://github.com/elastic/elasticsearch/blob/main/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc). These tests use a YAML-formatted domain language to issue REST requests against an internal {{es}} cluster that has your plugin installed, and to check the results of those requests. The structure of a YAML REST test directory is as follows: + +* A test suite class, defined under `src/yamlRestTest/java`. This class should extend `ESClientYamlSuiteTestCase`. +* The YAML tests themselves should be defined under `src/yamlRestTest/resources/test/`. + + + diff --git a/docs/extend/example-text-analysis-plugin.md b/docs/extend/example-text-analysis-plugin.md new file mode 100644 index 0000000000000..8943e39c356b0 --- /dev/null +++ b/docs/extend/example-text-analysis-plugin.md @@ -0,0 +1,190 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/example-text-analysis-plugin.html +--- + +# Example text analysis plugin [example-text-analysis-plugin] + +This example shows how to create a simple "Hello world" text analysis plugin using the stable plugin API. The plugin provides a custom Lucene token filter that strips all tokens except for "hello" and "world". + +Elastic provides a Grade plugin, `elasticsearch.stable-esplugin`, that makes it easier to develop and package stable plugins. The steps in this guide assume you use this plugin. However, you don’t need Gradle to create plugins. + +1. Create a new directory for your project. +2. In this example, the source code is organized under the `main` and `test` directories. In your project’s home directory, create `src/` `src/main/`, and `src/test/` directories. +3. Create the following `build.gradle` build script in your project’s home directory: + + ```gradle + ext.pluginApiVersion = '8.7.0' + ext.luceneVersion = '9.5.0' + + buildscript { + ext.pluginApiVersion = '8.7.0' + repositories { + mavenCentral() + } + dependencies { + classpath "org.elasticsearch.gradle:build-tools:${pluginApiVersion}" + } + } + + apply plugin: 'elasticsearch.stable-esplugin' + apply plugin: 'elasticsearch.yaml-rest-test' + + esplugin { + name 'my-plugin' + description 'My analysis plugin' + } + + group 'org.example' + version '1.0-SNAPSHOT' + + repositories { + mavenLocal() + mavenCentral() + } + + dependencies { + + //TODO transitive dependency off and plugin-api dependency? + compileOnly "org.elasticsearch.plugin:elasticsearch-plugin-api:${pluginApiVersion}" + compileOnly "org.elasticsearch.plugin:elasticsearch-plugin-analysis-api:${pluginApiVersion}" + compileOnly "org.apache.lucene:lucene-analysis-common:${luceneVersion}" + + //TODO for testing this also have to be declared + testImplementation "org.elasticsearch.plugin:elasticsearch-plugin-api:${pluginApiVersion}" + testImplementation "org.elasticsearch.plugin:elasticsearch-plugin-analysis-api:${pluginApiVersion}" + testImplementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}" + + testImplementation ('junit:junit:4.13.2'){ + exclude group: 'org.hamcrest' + } + testImplementation 'org.mockito:mockito-core:4.4.0' + testImplementation 'org.hamcrest:hamcrest:2.2' + + } + ``` + +4. In `src/main/java/org/example/`, create `HelloWorldTokenFilter.java`. This file provides the code for a token filter that strips all tokens except for "hello" and "world": + + ```java + package org.example; + + import org.apache.lucene.analysis.FilteringTokenFilter; + import org.apache.lucene.analysis.TokenStream; + import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + + import java.util.Arrays; + + public class HelloWorldTokenFilter extends FilteringTokenFilter { + private final CharTermAttribute term = addAttribute(CharTermAttribute.class); + + public HelloWorldTokenFilter(TokenStream input) { + super(input); + } + + @Override + public boolean accept() { + if (term.length() != 5) return false; + return Arrays.equals(term.buffer(), 0, 4, "hello".toCharArray(), 0, 4) + || Arrays.equals(term.buffer(), 0, 4, "world".toCharArray(), 0, 4); + } + } + ``` + +5. This filter can be provided to Elasticsearch using the following `HelloWorldTokenFilterFactory.java` factory class. The `@NamedComponent` annotation is used to give the filter the `hello_world` name. This is the name you can use to refer to the filter, once the plugin has been deployed. + + ```java + package org.example; + + import org.apache.lucene.analysis.TokenStream; + import org.elasticsearch.plugin.analysis.TokenFilterFactory; + import org.elasticsearch.plugin.NamedComponent; + + @NamedComponent(value = "hello_world") + public class HelloWorldTokenFilterFactory implements TokenFilterFactory { + + @Override + public TokenStream create(TokenStream tokenStream) { + return new HelloWorldTokenFilter(tokenStream); + } + + } + ``` + +6. Unit tests may go under the `src/test` directory. You will have to add dependencies for your preferred testing framework. +7. Run: + + ```sh + gradle bundlePlugin + ``` + + This builds the JAR file, generates the metadata files, and bundles them into a plugin ZIP file. The resulting ZIP file will be written to the `build/distributions` directory. + +8. [Install the plugin](/reference/elasticsearch-plugins/plugin-management.md). +9. You can use the `_analyze` API to verify that the `hello_world` token filter works as expected: + + ```console + GET /_analyze + { + "text": "hello to everyone except the world", + "tokenizer": "standard", + "filter": ["hello_world"] + } + ``` + + + +## YAML REST tests [_yaml_rest_tests_2] + +If you are using the `elasticsearch.stable-esplugin` plugin for Gradle, you can use {{es}}'s YAML Rest Test framework. This framework allows you to load your plugin in a running test cluster and issue real REST API queries against it. The full syntax for this framework is beyond the scope of this tutorial, but there are many examples in the Elasticsearch repository. Refer to the [example analysis plugin](https://github.com/elastic/elasticsearch/tree/main/plugins/examples/stable-analysis) in the {{es}} Github repository for an example. + +1. Create a `yamlRestTest` directory in the `src` directory. +2. Under the `yamlRestTest` directory, create a `java` folder for Java sources and a `resources` folder. +3. In `src/yamlRestTest/java/org/example/`, create `HelloWorldPluginClientYamlTestSuiteIT.java`. This class implements `ESClientYamlSuiteTestCase`. + + ```java + import com.carrotsearch.randomizedtesting.annotations.Name; + import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; + import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; + + public class HelloWorldPluginClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { + + public HelloWorldPluginClientYamlTestSuiteIT( + @Name("yaml") ClientYamlTestCandidate testCandidate + ) { + super(testCandidate); + } + + @ParametersFactory + public static Iterable parameters() throws Exception { + return ESClientYamlSuiteTestCase.createParameters(); + } + } + ``` + +4. In `src/yamlRestTest/resources/rest-api-spec/test/plugin`, create the `10_token_filter.yml` YAML file: + + ```yaml + ## Sample rest test + --- + "Hello world plugin test - removes all tokens except hello and world": + - do: + indices.analyze: + body: + text: hello to everyone except the world + tokenizer: standard + filter: + - type: "hello_world" + - length: { tokens: 2 } + - match: { tokens.0.token: "hello" } + - match: { tokens.1.token: "world" } + ``` + +5. Run the test with: + + ```sh + gradle yamlRestTest + ``` + + diff --git a/docs/extend/index.md b/docs/extend/index.md new file mode 100644 index 0000000000000..34741d2e035a2 --- /dev/null +++ b/docs/extend/index.md @@ -0,0 +1,19 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/plugin-authors.html +--- + +# Create Elasticsearch plugins [plugin-authors] + +{{es}} plugins are modular bits of code that add functionality to {{es}}. Plugins are written in Java and implement Java interfaces that are defined in the source code. Plugins are composed of JAR files and metadata files, compressed in a single zip file. + +There are two ways to create a plugin: + +[Creating text analysis plugins with the stable plugin API](/extend/creating-stable-plugins.md) +: Text analysis plugins can be developed against the stable plugin API to provide {{es}} with custom Lucene analyzers, token filters, character filters, and tokenizers. + +[Creating classic plugins](/extend/creating-classic-plugins.md) +: Other plugins can be developed against the classic plugin API to provide custom authentication, authorization, or scoring mechanisms, and more. + + + diff --git a/docs/extend/plugin-descriptor-file-classic.md b/docs/extend/plugin-descriptor-file-classic.md new file mode 100644 index 0000000000000..1ddf123228206 --- /dev/null +++ b/docs/extend/plugin-descriptor-file-classic.md @@ -0,0 +1,91 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/plugin-descriptor-file-classic.html +--- + +# The plugin descriptor file for classic plugins [plugin-descriptor-file-classic] + +The classic plugin descriptor file is a Java properties file called `plugin-descriptor.properties` that describes the plugin. The file is automatically created if you are using {{es}}'s Gradle build system. If you’re not using the gradle plugin, you can create it manually using the following template. + +```yaml +# Elasticsearch plugin descriptor file +# This file must exist as 'plugin-descriptor.properties' or 'stable-plugin-descriptor.properties inside a plugin. +# +## example plugin for "foo" +# +# foo.zip <-- zip file for the plugin, with this structure: +# |____ .jar <-- classes, resources, dependencies +# |____ .jar <-- any number of jars +# |____ plugin-descriptor.properties <-- example contents below: +# +# classname=foo.bar.BazPlugin +# description=My cool plugin +# version=6.0 +# elasticsearch.version=6.0 +# java.version=1.8 +# +## mandatory elements for all plugins: +# +# 'description': simple summary of the plugin +description=${description} +# +# 'version': plugin's version +version=${version} +# +# 'name': the plugin name +name=${name} +# +# 'java.version': version of java the code is built against +# use the system property java.specification.version +# version string must be a sequence of nonnegative decimal integers +# separated by "."'s and may have leading zeros +java.version=${javaVersion} +# +# 'elasticsearch.version': version of elasticsearch compiled against. +# Plugins implementing plugin-api.jar this version only has to match a major version of the ES server +# For all other plugins it has to be the same as ES server version +elasticsearch.version=${elasticsearchVersion} +## optional elements for plugins: +<% if (classname) { %> +# +# 'classname': the name of the class to load, fully-qualified. Only applies to +# "isolated" plugins +classname=${classname} +<% } %> +<% if (modulename) { %> +# +# 'modulename': the name of the module to load classname from. Only applies to +# "isolated" plugins. This is optional. Specifying it causes the plugin +# to be loaded as a module. +modulename=${modulename} +<% } %> +<% if (extendedPlugins) { %> +# +# 'extended.plugins': other plugins this plugin extends through SPI +extended.plugins=${extendedPlugins} +<% } %> +<% if (hasNativeController) { %> +# +# 'has.native.controller': whether or not the plugin has a native controller +has.native.controller=${hasNativeController} +<% } %> +<% if (licensed) { %> +# This plugin requires that a license agreement be accepted before installation +licensed=${licensed} +<% } %> +``` + + +## Properties [_properties_2] + +| Element | Type | Description | +| --- | --- | --- | +| `description` | String | simple summary of the plugin | +| `version` | String | plugin’s version | +| `name` | String | the plugin name | +| `classname` | String | the name of the class to load,fully-qualified. | +| `extended.plugins` | String | other plugins this plugin extends throughSPI. | +| `modulename` | String | the name of the module to load classnamefrom. Only applies to "isolated" plugins. This is optional. Specifying it causesthe plugin to be loaded as a module. | +| `java.version` | String | version of java the code is built against.Use the system property `java.specification.version`. Version string must be asequence of nonnegative decimal integers separated by "."'s and may have leadingzeros. | +| `elasticsearch.version` | String | version of {{es}} compiled against. | + diff --git a/docs/extend/plugin-descriptor-file-stable.md b/docs/extend/plugin-descriptor-file-stable.md new file mode 100644 index 0000000000000..58e2e1440a858 --- /dev/null +++ b/docs/extend/plugin-descriptor-file-stable.md @@ -0,0 +1,89 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/plugin-descriptor-file-stable.html +--- + +# The plugin descriptor file for stable plugins [plugin-descriptor-file-stable] + +The stable plugin descriptor file is a Java properties file called `stable-plugin-descriptor.properties` that describes the plugin. The file is automatically created if you are using {{es}}'s Gradle build system. If you’re not using the gradle plugin, you can create it manually using the following template. + +```yaml +# Elasticsearch plugin descriptor file +# This file must exist as 'plugin-descriptor.properties' or 'stable-plugin-descriptor.properties inside a plugin. +# +## example plugin for "foo" +# +# foo.zip <-- zip file for the plugin, with this structure: +# |____ .jar <-- classes, resources, dependencies +# |____ .jar <-- any number of jars +# |____ plugin-descriptor.properties <-- example contents below: +# +# classname=foo.bar.BazPlugin +# description=My cool plugin +# version=6.0 +# elasticsearch.version=6.0 +# java.version=1.8 +# +## mandatory elements for all plugins: +# +# 'description': simple summary of the plugin +description=${description} +# +# 'version': plugin's version +version=${version} +# +# 'name': the plugin name +name=${name} +# +# 'java.version': version of java the code is built against +# use the system property java.specification.version +# version string must be a sequence of nonnegative decimal integers +# separated by "."'s and may have leading zeros +java.version=${javaVersion} +# +# 'elasticsearch.version': version of elasticsearch compiled against. +# Plugins implementing plugin-api.jar this version only has to match a major version of the ES server +# For all other plugins it has to be the same as ES server version +elasticsearch.version=${elasticsearchVersion} +## optional elements for plugins: +<% if (classname) { %> +# +# 'classname': the name of the class to load, fully-qualified. Only applies to +# "isolated" plugins +classname=${classname} +<% } %> +<% if (modulename) { %> +# +# 'modulename': the name of the module to load classname from. Only applies to +# "isolated" plugins. This is optional. Specifying it causes the plugin +# to be loaded as a module. +modulename=${modulename} +<% } %> +<% if (extendedPlugins) { %> +# +# 'extended.plugins': other plugins this plugin extends through SPI +extended.plugins=${extendedPlugins} +<% } %> +<% if (hasNativeController) { %> +# +# 'has.native.controller': whether or not the plugin has a native controller +has.native.controller=${hasNativeController} +<% } %> +<% if (licensed) { %> +# This plugin requires that a license agreement be accepted before installation +licensed=${licensed} +<% } %> +``` + + +## Properties [_properties] + +| Element | Type | Description | +| --- | --- | --- | +| `description` | String | simple summary of the plugin | +| `version` | String | plugin’s version | +| `name` | String | the plugin name | +| `classname` | String | this property is for classic plugins. Donot include this property for stable plugins. | +| `java.version` | String | version of java the code is built against.Use the system property `java.specification.version`. Version string must be asequence of nonnegative decimal integers separated by "."'s and may have leadingzeros. | +| `elasticsearch.version` | String | version of {{es}} compiled against. | + diff --git a/docs/extend/toc.yml b/docs/extend/toc.yml new file mode 100644 index 0000000000000..7b18e7b6aaa17 --- /dev/null +++ b/docs/extend/toc.yml @@ -0,0 +1,9 @@ +toc: + - file: index.md + - file: creating-stable-plugins.md + children: + - file: plugin-descriptor-file-stable.md + - file: example-text-analysis-plugin.md + - file: creating-classic-plugins.md + children: + - file: plugin-descriptor-file-classic.md \ No newline at end of file diff --git a/docs/reference/images/Exponential.png b/docs/images/Exponential.png similarity index 100% rename from docs/reference/images/Exponential.png rename to docs/images/Exponential.png diff --git a/docs/reference/images/Gaussian.png b/docs/images/Gaussian.png similarity index 100% rename from docs/reference/images/Gaussian.png rename to docs/images/Gaussian.png diff --git a/docs/reference/images/Linear.png b/docs/images/Linear.png similarity index 100% rename from docs/reference/images/Linear.png rename to docs/images/Linear.png diff --git a/docs/reference/esql/functions/signature/abs.svg b/docs/images/abs.svg similarity index 100% rename from docs/reference/esql/functions/signature/abs.svg rename to docs/images/abs.svg diff --git a/docs/reference/images/rare_terms/accuracy_0001.png b/docs/images/accuracy_0001.png similarity index 100% rename from docs/reference/images/rare_terms/accuracy_0001.png rename to docs/images/accuracy_0001.png diff --git a/docs/reference/images/rare_terms/accuracy_001.png b/docs/images/accuracy_001.png similarity index 100% rename from docs/reference/images/rare_terms/accuracy_001.png rename to docs/images/accuracy_001.png diff --git a/docs/reference/images/rare_terms/accuracy_01.png b/docs/images/accuracy_01.png similarity index 100% rename from docs/reference/images/rare_terms/accuracy_01.png rename to docs/images/accuracy_01.png diff --git a/docs/reference/esql/functions/signature/acos.svg b/docs/images/acos.svg similarity index 100% rename from docs/reference/esql/functions/signature/acos.svg rename to docs/images/acos.svg diff --git a/docs/reference/esql/functions/signature/add.svg b/docs/images/add.svg similarity index 100% rename from docs/reference/esql/functions/signature/add.svg rename to docs/images/add.svg diff --git a/docs/reference/esql/functions/signature/asin.svg b/docs/images/asin.svg similarity index 100% rename from docs/reference/esql/functions/signature/asin.svg rename to docs/images/asin.svg diff --git a/docs/reference/esql/functions/signature/atan.svg b/docs/images/atan.svg similarity index 100% rename from docs/reference/esql/functions/signature/atan.svg rename to docs/images/atan.svg diff --git a/docs/reference/esql/functions/signature/atan2.svg b/docs/images/atan2.svg similarity index 100% rename from docs/reference/esql/functions/signature/atan2.svg rename to docs/images/atan2.svg diff --git a/docs/images/avg.svg b/docs/images/avg.svg new file mode 100644 index 0000000000000..e20d7dd3ff971 --- /dev/null +++ b/docs/images/avg.svg @@ -0,0 +1 @@ +AVG(number) \ No newline at end of file diff --git a/docs/reference/esql/functions/signature/bit_length.svg b/docs/images/bit_length.svg similarity index 100% rename from docs/reference/esql/functions/signature/bit_length.svg rename to docs/images/bit_length.svg diff --git a/docs/reference/esql/functions/signature/bucket.svg b/docs/images/bucket.svg similarity index 100% rename from docs/reference/esql/functions/signature/bucket.svg rename to docs/images/bucket.svg diff --git a/docs/reference/esql/functions/signature/byte_length.svg b/docs/images/byte_length.svg similarity index 100% rename from docs/reference/esql/functions/signature/byte_length.svg rename to docs/images/byte_length.svg diff --git a/docs/reference/images/cardinality_error.png b/docs/images/cardinality_error.png similarity index 100% rename from docs/reference/images/cardinality_error.png rename to docs/images/cardinality_error.png diff --git a/docs/reference/esql/functions/signature/case.svg b/docs/images/case.svg similarity index 100% rename from docs/reference/esql/functions/signature/case.svg rename to docs/images/case.svg diff --git a/docs/reference/esql/functions/signature/categorize.svg b/docs/images/categorize.svg similarity index 100% rename from docs/reference/esql/functions/signature/categorize.svg rename to docs/images/categorize.svg diff --git a/docs/reference/esql/functions/signature/cbrt.svg b/docs/images/cbrt.svg similarity index 100% rename from docs/reference/esql/functions/signature/cbrt.svg rename to docs/images/cbrt.svg diff --git a/docs/reference/esql/functions/signature/ceil.svg b/docs/images/ceil.svg similarity index 100% rename from docs/reference/esql/functions/signature/ceil.svg rename to docs/images/ceil.svg diff --git a/docs/reference/esql/functions/signature/cidr_match.svg b/docs/images/cidr_match.svg similarity index 100% rename from docs/reference/esql/functions/signature/cidr_match.svg rename to docs/images/cidr_match.svg diff --git a/docs/reference/esql/functions/signature/coalesce.svg b/docs/images/coalesce.svg similarity index 100% rename from docs/reference/esql/functions/signature/coalesce.svg rename to docs/images/coalesce.svg diff --git a/docs/reference/esql/functions/signature/concat.svg b/docs/images/concat.svg similarity index 100% rename from docs/reference/esql/functions/signature/concat.svg rename to docs/images/concat.svg diff --git a/docs/reference/esql/functions/signature/cos.svg b/docs/images/cos.svg similarity index 100% rename from docs/reference/esql/functions/signature/cos.svg rename to docs/images/cos.svg diff --git a/docs/reference/esql/functions/signature/cosh.svg b/docs/images/cosh.svg similarity index 100% rename from docs/reference/esql/functions/signature/cosh.svg rename to docs/images/cosh.svg diff --git a/docs/reference/esql/functions/signature/count.svg b/docs/images/count.svg similarity index 100% rename from docs/reference/esql/functions/signature/count.svg rename to docs/images/count.svg diff --git a/docs/reference/esql/functions/signature/count_distinct.svg b/docs/images/count_distinct.svg similarity index 100% rename from docs/reference/esql/functions/signature/count_distinct.svg rename to docs/images/count_distinct.svg diff --git a/docs/reference/images/data-streams/create-index-template.png b/docs/images/create-index-template.png similarity index 100% rename from docs/reference/images/data-streams/create-index-template.png rename to docs/images/create-index-template.png diff --git a/docs/reference/esql/functions/signature/date_diff.svg b/docs/images/date_diff.svg similarity index 100% rename from docs/reference/esql/functions/signature/date_diff.svg rename to docs/images/date_diff.svg diff --git a/docs/reference/esql/functions/signature/date_extract.svg b/docs/images/date_extract.svg similarity index 100% rename from docs/reference/esql/functions/signature/date_extract.svg rename to docs/images/date_extract.svg diff --git a/docs/reference/esql/functions/signature/date_format.svg b/docs/images/date_format.svg similarity index 100% rename from docs/reference/esql/functions/signature/date_format.svg rename to docs/images/date_format.svg diff --git a/docs/reference/esql/functions/signature/date_parse.svg b/docs/images/date_parse.svg similarity index 100% rename from docs/reference/esql/functions/signature/date_parse.svg rename to docs/images/date_parse.svg diff --git a/docs/reference/esql/functions/signature/date_trunc.svg b/docs/images/date_trunc.svg similarity index 100% rename from docs/reference/esql/functions/signature/date_trunc.svg rename to docs/images/date_trunc.svg diff --git a/docs/reference/images/decay_2d.png b/docs/images/decay_2d.png similarity index 100% rename from docs/reference/images/decay_2d.png rename to docs/images/decay_2d.png diff --git a/docs/reference/esql/functions/signature/div.svg b/docs/images/div.svg similarity index 100% rename from docs/reference/esql/functions/signature/div.svg rename to docs/images/div.svg diff --git a/docs/reference/connector/docs/images/dls-api-key-workflow.png b/docs/images/dls-api-key-workflow.png similarity index 100% rename from docs/reference/connector/docs/images/dls-api-key-workflow.png rename to docs/images/dls-api-key-workflow.png diff --git a/docs/reference/images/pipeline_serialdiff/dow.png b/docs/images/dow.png similarity index 100% rename from docs/reference/images/pipeline_serialdiff/dow.png rename to docs/images/dow.png diff --git a/docs/reference/esql/functions/signature/e.svg b/docs/images/e.svg similarity index 100% rename from docs/reference/esql/functions/signature/e.svg rename to docs/images/e.svg diff --git a/docs/reference/esql/functions/signature/ends_with.svg b/docs/images/ends_with.svg similarity index 100% rename from docs/reference/esql/functions/signature/ends_with.svg rename to docs/images/ends_with.svg diff --git a/docs/reference/esql/functions/signature/equals.svg b/docs/images/equals.svg similarity index 100% rename from docs/reference/esql/functions/signature/equals.svg rename to docs/images/equals.svg diff --git a/docs/reference/images/spatial/error_distance.png b/docs/images/error_distance.png similarity index 100% rename from docs/reference/images/spatial/error_distance.png rename to docs/images/error_distance.png diff --git a/docs/reference/images/esql/esql-enrich-command.png b/docs/images/esql-enrich-command.png similarity index 100% rename from docs/reference/images/esql/esql-enrich-command.png rename to docs/images/esql-enrich-command.png diff --git a/docs/reference/images/esql/esql-enrich-policy.png b/docs/images/esql-enrich-policy.png similarity index 100% rename from docs/reference/images/esql/esql-enrich-policy.png rename to docs/images/esql-enrich-policy.png diff --git a/docs/reference/images/esql/esql-enrich.png b/docs/images/esql-enrich.png similarity index 100% rename from docs/reference/images/esql/esql-enrich.png rename to docs/images/esql-enrich.png diff --git a/docs/reference/esql/functions/signature/exp.svg b/docs/images/exp.svg similarity index 100% rename from docs/reference/esql/functions/signature/exp.svg rename to docs/images/exp.svg diff --git a/docs/reference/connector/docs/images/filtering-general-diagram.png b/docs/images/filtering-general-diagram.png similarity index 100% rename from docs/reference/connector/docs/images/filtering-general-diagram.png rename to docs/images/filtering-general-diagram.png diff --git a/docs/reference/connector/docs/images/filtering-rules-zero-state.png b/docs/images/filtering-rules-zero-state.png similarity index 100% rename from docs/reference/connector/docs/images/filtering-rules-zero-state.png rename to docs/images/filtering-rules-zero-state.png diff --git a/docs/reference/esql/functions/signature/floor.svg b/docs/images/floor.svg similarity index 100% rename from docs/reference/esql/functions/signature/floor.svg rename to docs/images/floor.svg diff --git a/docs/reference/esql/functions/signature/from_base64.svg b/docs/images/from_base64.svg similarity index 100% rename from docs/reference/esql/functions/signature/from_base64.svg rename to docs/images/from_base64.svg diff --git a/docs/reference/images/spatial/geo_line.png b/docs/images/geo_line.png similarity index 100% rename from docs/reference/images/spatial/geo_line.png rename to docs/images/geo_line.png diff --git a/docs/reference/images/spatial/geogrid_h3.png b/docs/images/geogrid_h3.png similarity index 100% rename from docs/reference/images/spatial/geogrid_h3.png rename to docs/images/geogrid_h3.png diff --git a/docs/reference/images/spatial/geogrid_h3_children.png b/docs/images/geogrid_h3_children.png similarity index 100% rename from docs/reference/images/spatial/geogrid_h3_children.png rename to docs/images/geogrid_h3_children.png diff --git a/docs/reference/images/spatial/geogrid_tile.png b/docs/images/geogrid_tile.png similarity index 100% rename from docs/reference/images/spatial/geogrid_tile.png rename to docs/images/geogrid_tile.png diff --git a/docs/reference/images/spatial/geoshape_grid.png b/docs/images/geoshape_grid.png similarity index 100% rename from docs/reference/images/spatial/geoshape_grid.png rename to docs/images/geoshape_grid.png diff --git a/docs/reference/images/spatial/geoshape_hexgrid.png b/docs/images/geoshape_hexgrid.png similarity index 100% rename from docs/reference/images/spatial/geoshape_hexgrid.png rename to docs/images/geoshape_hexgrid.png diff --git a/docs/reference/esql/functions/signature/greater_than.svg b/docs/images/greater_than.svg similarity index 100% rename from docs/reference/esql/functions/signature/greater_than.svg rename to docs/images/greater_than.svg diff --git a/docs/reference/esql/functions/signature/greater_than_or_equal.svg b/docs/images/greater_than_or_equal.svg similarity index 100% rename from docs/reference/esql/functions/signature/greater_than_or_equal.svg rename to docs/images/greater_than_or_equal.svg diff --git a/docs/reference/esql/functions/signature/greatest.svg b/docs/images/greatest.svg similarity index 100% rename from docs/reference/esql/functions/signature/greatest.svg rename to docs/images/greatest.svg diff --git a/docs/reference/esql/functions/signature/hash.svg b/docs/images/hash.svg similarity index 100% rename from docs/reference/esql/functions/signature/hash.svg rename to docs/images/hash.svg diff --git a/docs/reference/connector/docs/images/hybrid-architecture.png b/docs/images/hybrid-architecture.png similarity index 100% rename from docs/reference/connector/docs/images/hybrid-architecture.png rename to docs/images/hybrid-architecture.png diff --git a/docs/reference/esql/functions/signature/hypot.svg b/docs/images/hypot.svg similarity index 100% rename from docs/reference/esql/functions/signature/hypot.svg rename to docs/images/hypot.svg diff --git a/docs/reference/esql/functions/signature/ip_prefix.svg b/docs/images/ip_prefix.svg similarity index 100% rename from docs/reference/esql/functions/signature/ip_prefix.svg rename to docs/images/ip_prefix.svg diff --git a/docs/reference/images/spatial/kodiak_geo_line_simplified.png b/docs/images/kodiak_geo_line_simplified.png similarity index 100% rename from docs/reference/images/spatial/kodiak_geo_line_simplified.png rename to docs/images/kodiak_geo_line_simplified.png diff --git a/docs/reference/images/spatial/kodiak_geo_line_truncated.png b/docs/images/kodiak_geo_line_truncated.png similarity index 100% rename from docs/reference/images/spatial/kodiak_geo_line_truncated.png rename to docs/images/kodiak_geo_line_truncated.png diff --git a/docs/reference/esql/functions/signature/kql.svg b/docs/images/kql.svg similarity index 100% rename from docs/reference/esql/functions/signature/kql.svg rename to docs/images/kql.svg diff --git a/docs/reference/images/lambda.png b/docs/images/lambda.png similarity index 100% rename from docs/reference/images/lambda.png rename to docs/images/lambda.png diff --git a/docs/reference/images/lambda_calc.png b/docs/images/lambda_calc.png similarity index 100% rename from docs/reference/images/lambda_calc.png rename to docs/images/lambda_calc.png diff --git a/docs/reference/esql/functions/signature/least.svg b/docs/images/least.svg similarity index 100% rename from docs/reference/esql/functions/signature/least.svg rename to docs/images/least.svg diff --git a/docs/reference/esql/functions/signature/left.svg b/docs/images/left.svg similarity index 100% rename from docs/reference/esql/functions/signature/left.svg rename to docs/images/left.svg diff --git a/docs/reference/images/pipeline_serialdiff/lemmings.png b/docs/images/lemmings.png similarity index 100% rename from docs/reference/images/pipeline_serialdiff/lemmings.png rename to docs/images/lemmings.png diff --git a/docs/reference/esql/functions/signature/length.svg b/docs/images/length.svg similarity index 100% rename from docs/reference/esql/functions/signature/length.svg rename to docs/images/length.svg diff --git a/docs/reference/esql/functions/signature/less_than.svg b/docs/images/less_than.svg similarity index 100% rename from docs/reference/esql/functions/signature/less_than.svg rename to docs/images/less_than.svg diff --git a/docs/reference/esql/functions/signature/less_than_or_equal.svg b/docs/images/less_than_or_equal.svg similarity index 100% rename from docs/reference/esql/functions/signature/less_than_or_equal.svg rename to docs/images/less_than_or_equal.svg diff --git a/docs/reference/esql/functions/signature/locate.svg b/docs/images/locate.svg similarity index 100% rename from docs/reference/esql/functions/signature/locate.svg rename to docs/images/locate.svg diff --git a/docs/reference/esql/functions/signature/log.svg b/docs/images/log.svg similarity index 100% rename from docs/reference/esql/functions/signature/log.svg rename to docs/images/log.svg diff --git a/docs/reference/esql/functions/signature/log10.svg b/docs/images/log10.svg similarity index 100% rename from docs/reference/esql/functions/signature/log10.svg rename to docs/images/log10.svg diff --git a/docs/reference/esql/functions/signature/ltrim.svg b/docs/images/ltrim.svg similarity index 100% rename from docs/reference/esql/functions/signature/ltrim.svg rename to docs/images/ltrim.svg diff --git a/docs/reference/esql/functions/signature/match.svg b/docs/images/match.svg similarity index 100% rename from docs/reference/esql/functions/signature/match.svg rename to docs/images/match.svg diff --git a/docs/reference/esql/functions/signature/match_operator.svg b/docs/images/match_operator.svg similarity index 100% rename from docs/reference/esql/functions/signature/match_operator.svg rename to docs/images/match_operator.svg diff --git a/docs/reference/esql/functions/signature/max.svg b/docs/images/max.svg similarity index 100% rename from docs/reference/esql/functions/signature/max.svg rename to docs/images/max.svg diff --git a/docs/reference/esql/functions/signature/md5.svg b/docs/images/md5.svg similarity index 100% rename from docs/reference/esql/functions/signature/md5.svg rename to docs/images/md5.svg diff --git a/docs/reference/esql/functions/signature/median.svg b/docs/images/median.svg similarity index 100% rename from docs/reference/esql/functions/signature/median.svg rename to docs/images/median.svg diff --git a/docs/reference/esql/functions/signature/median_absolute_deviation.svg b/docs/images/median_absolute_deviation.svg similarity index 100% rename from docs/reference/esql/functions/signature/median_absolute_deviation.svg rename to docs/images/median_absolute_deviation.svg diff --git a/docs/reference/images/rare_terms/memory.png b/docs/images/memory.png similarity index 100% rename from docs/reference/images/rare_terms/memory.png rename to docs/images/memory.png diff --git a/docs/reference/esql/functions/signature/min.svg b/docs/images/min.svg similarity index 100% rename from docs/reference/esql/functions/signature/min.svg rename to docs/images/min.svg diff --git a/docs/reference/esql/functions/signature/mod.svg b/docs/images/mod.svg similarity index 100% rename from docs/reference/esql/functions/signature/mod.svg rename to docs/images/mod.svg diff --git a/docs/reference/connector/docs/images/mongodb-connector-config.png b/docs/images/mongodb-connector-config.png similarity index 100% rename from docs/reference/connector/docs/images/mongodb-connector-config.png rename to docs/images/mongodb-connector-config.png diff --git a/docs/reference/connector/docs/images/mongodb-load-sample-data.png b/docs/images/mongodb-load-sample-data.png similarity index 100% rename from docs/reference/connector/docs/images/mongodb-load-sample-data.png rename to docs/images/mongodb-load-sample-data.png diff --git a/docs/reference/connector/docs/images/mongodb-sample-document.png b/docs/images/mongodb-sample-document.png similarity index 100% rename from docs/reference/connector/docs/images/mongodb-sample-document.png rename to docs/images/mongodb-sample-document.png diff --git a/docs/reference/esql/functions/signature/mul.svg b/docs/images/mul.svg similarity index 100% rename from docs/reference/esql/functions/signature/mul.svg rename to docs/images/mul.svg diff --git a/docs/reference/esql/functions/signature/mv_append.svg b/docs/images/mv_append.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_append.svg rename to docs/images/mv_append.svg diff --git a/docs/reference/esql/functions/signature/mv_avg.svg b/docs/images/mv_avg.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_avg.svg rename to docs/images/mv_avg.svg diff --git a/docs/reference/esql/functions/signature/mv_concat.svg b/docs/images/mv_concat.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_concat.svg rename to docs/images/mv_concat.svg diff --git a/docs/reference/esql/functions/signature/mv_count.svg b/docs/images/mv_count.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_count.svg rename to docs/images/mv_count.svg diff --git a/docs/reference/esql/functions/signature/mv_dedupe.svg b/docs/images/mv_dedupe.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_dedupe.svg rename to docs/images/mv_dedupe.svg diff --git a/docs/reference/esql/functions/signature/mv_first.svg b/docs/images/mv_first.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_first.svg rename to docs/images/mv_first.svg diff --git a/docs/reference/esql/functions/signature/mv_last.svg b/docs/images/mv_last.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_last.svg rename to docs/images/mv_last.svg diff --git a/docs/reference/esql/functions/signature/mv_max.svg b/docs/images/mv_max.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_max.svg rename to docs/images/mv_max.svg diff --git a/docs/reference/esql/functions/signature/mv_median.svg b/docs/images/mv_median.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_median.svg rename to docs/images/mv_median.svg diff --git a/docs/reference/esql/functions/signature/mv_median_absolute_deviation.svg b/docs/images/mv_median_absolute_deviation.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_median_absolute_deviation.svg rename to docs/images/mv_median_absolute_deviation.svg diff --git a/docs/reference/esql/functions/signature/mv_min.svg b/docs/images/mv_min.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_min.svg rename to docs/images/mv_min.svg diff --git a/docs/reference/esql/functions/signature/mv_percentile.svg b/docs/images/mv_percentile.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_percentile.svg rename to docs/images/mv_percentile.svg diff --git a/docs/reference/esql/functions/signature/mv_pseries_weighted_sum.svg b/docs/images/mv_pseries_weighted_sum.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_pseries_weighted_sum.svg rename to docs/images/mv_pseries_weighted_sum.svg diff --git a/docs/reference/esql/functions/signature/mv_slice.svg b/docs/images/mv_slice.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_slice.svg rename to docs/images/mv_slice.svg diff --git a/docs/reference/esql/functions/signature/mv_sort.svg b/docs/images/mv_sort.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_sort.svg rename to docs/images/mv_sort.svg diff --git a/docs/reference/esql/functions/signature/mv_sum.svg b/docs/images/mv_sum.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_sum.svg rename to docs/images/mv_sum.svg diff --git a/docs/reference/esql/functions/signature/mv_zip.svg b/docs/images/mv_zip.svg similarity index 100% rename from docs/reference/esql/functions/signature/mv_zip.svg rename to docs/images/mv_zip.svg diff --git a/docs/reference/esql/functions/signature/neg.svg b/docs/images/neg.svg similarity index 100% rename from docs/reference/esql/functions/signature/neg.svg rename to docs/images/neg.svg diff --git a/docs/reference/esql/functions/signature/not_equals.svg b/docs/images/not_equals.svg similarity index 100% rename from docs/reference/esql/functions/signature/not_equals.svg rename to docs/images/not_equals.svg diff --git a/docs/reference/esql/functions/signature/now.svg b/docs/images/now.svg similarity index 100% rename from docs/reference/esql/functions/signature/now.svg rename to docs/images/now.svg diff --git a/docs/reference/esql/functions/signature/percentile.svg b/docs/images/percentile.svg similarity index 100% rename from docs/reference/esql/functions/signature/percentile.svg rename to docs/images/percentile.svg diff --git a/docs/reference/images/percentiles_error.png b/docs/images/percentiles_error.png similarity index 100% rename from docs/reference/images/percentiles_error.png rename to docs/images/percentiles_error.png diff --git a/docs/reference/esql/functions/signature/pi.svg b/docs/images/pi.svg similarity index 100% rename from docs/reference/esql/functions/signature/pi.svg rename to docs/images/pi.svg diff --git a/docs/reference/connector/docs/images/pipelines-extraction-sync-rules.png b/docs/images/pipelines-extraction-sync-rules.png similarity index 100% rename from docs/reference/connector/docs/images/pipelines-extraction-sync-rules.png rename to docs/images/pipelines-extraction-sync-rules.png diff --git a/docs/reference/esql/functions/signature/pow.svg b/docs/images/pow.svg similarity index 100% rename from docs/reference/esql/functions/signature/pow.svg rename to docs/images/pow.svg diff --git a/docs/reference/images/esql/processing-command.svg b/docs/images/processing-command.svg similarity index 100% rename from docs/reference/images/esql/processing-command.svg rename to docs/images/processing-command.svg diff --git a/docs/reference/esql/functions/signature/qstr.svg b/docs/images/qstr.svg similarity index 100% rename from docs/reference/esql/functions/signature/qstr.svg rename to docs/images/qstr.svg diff --git a/docs/reference/images/aggregations/random-sampler-agg-graph.png b/docs/images/random-sampler-agg-graph.png similarity index 100% rename from docs/reference/images/aggregations/random-sampler-agg-graph.png rename to docs/images/random-sampler-agg-graph.png diff --git a/docs/reference/images/aggregations/relative-error-vs-doc-count.png b/docs/images/relative-error-vs-doc-count.png similarity index 100% rename from docs/reference/images/aggregations/relative-error-vs-doc-count.png rename to docs/images/relative-error-vs-doc-count.png diff --git a/docs/reference/esql/functions/signature/repeat.svg b/docs/images/repeat.svg similarity index 100% rename from docs/reference/esql/functions/signature/repeat.svg rename to docs/images/repeat.svg diff --git a/docs/reference/esql/functions/signature/replace.svg b/docs/images/replace.svg similarity index 100% rename from docs/reference/esql/functions/signature/replace.svg rename to docs/images/replace.svg diff --git a/docs/reference/esql/functions/signature/reverse.svg b/docs/images/reverse.svg similarity index 100% rename from docs/reference/esql/functions/signature/reverse.svg rename to docs/images/reverse.svg diff --git a/docs/reference/esql/functions/signature/right.svg b/docs/images/right.svg similarity index 100% rename from docs/reference/esql/functions/signature/right.svg rename to docs/images/right.svg diff --git a/docs/reference/esql/functions/signature/round.svg b/docs/images/round.svg similarity index 100% rename from docs/reference/esql/functions/signature/round.svg rename to docs/images/round.svg diff --git a/docs/reference/esql/functions/signature/rtrim.svg b/docs/images/rtrim.svg similarity index 100% rename from docs/reference/esql/functions/signature/rtrim.svg rename to docs/images/rtrim.svg diff --git a/docs/reference/images/s_calc.png b/docs/images/s_calc.png similarity index 100% rename from docs/reference/images/s_calc.png rename to docs/images/s_calc.png diff --git a/docs/reference/connector/docs/images/self-managed-architecture.png b/docs/images/self-managed-architecture.png similarity index 100% rename from docs/reference/connector/docs/images/self-managed-architecture.png rename to docs/images/self-managed-architecture.png diff --git a/docs/reference/images/eql/separate-state-machines.svg b/docs/images/separate-state-machines.svg similarity index 100% rename from docs/reference/images/eql/separate-state-machines.svg rename to docs/images/separate-state-machines.svg diff --git a/docs/reference/images/eql/sequence-state-machine.svg b/docs/images/sequence-state-machine.svg similarity index 100% rename from docs/reference/images/eql/sequence-state-machine.svg rename to docs/images/sequence-state-machine.svg diff --git a/docs/reference/esql/functions/signature/sha1.svg b/docs/images/sha1.svg similarity index 100% rename from docs/reference/esql/functions/signature/sha1.svg rename to docs/images/sha1.svg diff --git a/docs/reference/esql/functions/signature/sha256.svg b/docs/images/sha256.svg similarity index 100% rename from docs/reference/esql/functions/signature/sha256.svg rename to docs/images/sha256.svg diff --git a/docs/reference/images/sigma.png b/docs/images/sigma.png similarity index 100% rename from docs/reference/images/sigma.png rename to docs/images/sigma.png diff --git a/docs/reference/images/sigma_calc.png b/docs/images/sigma_calc.png similarity index 100% rename from docs/reference/images/sigma_calc.png rename to docs/images/sigma_calc.png diff --git a/docs/reference/esql/functions/signature/signum.svg b/docs/images/signum.svg similarity index 100% rename from docs/reference/esql/functions/signature/signum.svg rename to docs/images/signum.svg diff --git a/docs/reference/connector/docs/images/simple-rule-greater.png b/docs/images/simple-rule-greater.png similarity index 100% rename from docs/reference/connector/docs/images/simple-rule-greater.png rename to docs/images/simple-rule-greater.png diff --git a/docs/reference/connector/docs/images/simple-rule-regex.png b/docs/images/simple-rule-regex.png similarity index 100% rename from docs/reference/connector/docs/images/simple-rule-regex.png rename to docs/images/simple-rule-regex.png diff --git a/docs/reference/esql/functions/signature/sin.svg b/docs/images/sin.svg similarity index 100% rename from docs/reference/esql/functions/signature/sin.svg rename to docs/images/sin.svg diff --git a/docs/reference/esql/functions/signature/sinh.svg b/docs/images/sinh.svg similarity index 100% rename from docs/reference/esql/functions/signature/sinh.svg rename to docs/images/sinh.svg diff --git a/docs/reference/images/esql/source-command.svg b/docs/images/source-command.svg similarity index 100% rename from docs/reference/images/esql/source-command.svg rename to docs/images/source-command.svg diff --git a/docs/reference/esql/functions/signature/space.svg b/docs/images/space.svg similarity index 100% rename from docs/reference/esql/functions/signature/space.svg rename to docs/images/space.svg diff --git a/docs/reference/esql/functions/signature/split.svg b/docs/images/split.svg similarity index 100% rename from docs/reference/esql/functions/signature/split.svg rename to docs/images/split.svg diff --git a/docs/reference/esql/functions/signature/sqrt.svg b/docs/images/sqrt.svg similarity index 100% rename from docs/reference/esql/functions/signature/sqrt.svg rename to docs/images/sqrt.svg diff --git a/docs/reference/esql/functions/signature/st_centroid_agg.svg b/docs/images/st_centroid_agg.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_centroid_agg.svg rename to docs/images/st_centroid_agg.svg diff --git a/docs/reference/esql/functions/signature/st_contains.svg b/docs/images/st_contains.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_contains.svg rename to docs/images/st_contains.svg diff --git a/docs/reference/esql/functions/signature/st_disjoint.svg b/docs/images/st_disjoint.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_disjoint.svg rename to docs/images/st_disjoint.svg diff --git a/docs/reference/esql/functions/signature/st_distance.svg b/docs/images/st_distance.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_distance.svg rename to docs/images/st_distance.svg diff --git a/docs/reference/esql/functions/signature/st_envelope.svg b/docs/images/st_envelope.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_envelope.svg rename to docs/images/st_envelope.svg diff --git a/docs/reference/esql/functions/signature/st_extent_agg.svg b/docs/images/st_extent_agg.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_extent_agg.svg rename to docs/images/st_extent_agg.svg diff --git a/docs/reference/esql/functions/signature/st_intersects.svg b/docs/images/st_intersects.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_intersects.svg rename to docs/images/st_intersects.svg diff --git a/docs/reference/esql/functions/signature/st_within.svg b/docs/images/st_within.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_within.svg rename to docs/images/st_within.svg diff --git a/docs/reference/esql/functions/signature/st_x.svg b/docs/images/st_x.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_x.svg rename to docs/images/st_x.svg diff --git a/docs/reference/esql/functions/signature/st_xmax.svg b/docs/images/st_xmax.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_xmax.svg rename to docs/images/st_xmax.svg diff --git a/docs/reference/esql/functions/signature/st_xmin.svg b/docs/images/st_xmin.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_xmin.svg rename to docs/images/st_xmin.svg diff --git a/docs/reference/esql/functions/signature/st_y.svg b/docs/images/st_y.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_y.svg rename to docs/images/st_y.svg diff --git a/docs/reference/esql/functions/signature/st_ymax.svg b/docs/images/st_ymax.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_ymax.svg rename to docs/images/st_ymax.svg diff --git a/docs/reference/esql/functions/signature/st_ymin.svg b/docs/images/st_ymin.svg similarity index 100% rename from docs/reference/esql/functions/signature/st_ymin.svg rename to docs/images/st_ymin.svg diff --git a/docs/reference/esql/functions/signature/starts_with.svg b/docs/images/starts_with.svg similarity index 100% rename from docs/reference/esql/functions/signature/starts_with.svg rename to docs/images/starts_with.svg diff --git a/docs/reference/esql/functions/signature/std_dev.svg b/docs/images/std_dev.svg similarity index 100% rename from docs/reference/esql/functions/signature/std_dev.svg rename to docs/images/std_dev.svg diff --git a/docs/reference/esql/functions/signature/sub.svg b/docs/images/sub.svg similarity index 100% rename from docs/reference/esql/functions/signature/sub.svg rename to docs/images/sub.svg diff --git a/docs/reference/esql/functions/signature/substring.svg b/docs/images/substring.svg similarity index 100% rename from docs/reference/esql/functions/signature/substring.svg rename to docs/images/substring.svg diff --git a/docs/reference/esql/functions/signature/sum.svg b/docs/images/sum.svg similarity index 100% rename from docs/reference/esql/functions/signature/sum.svg rename to docs/images/sum.svg diff --git a/docs/reference/connector/docs/images/sync-rules-advanced-rules-appeared.png b/docs/images/sync-rules-advanced-rules-appeared.png similarity index 100% rename from docs/reference/connector/docs/images/sync-rules-advanced-rules-appeared.png rename to docs/images/sync-rules-advanced-rules-appeared.png diff --git a/docs/reference/connector/docs/images/sync-rules-applied-rules-during-sync.png b/docs/images/sync-rules-applied-rules-during-sync.png similarity index 100% rename from docs/reference/connector/docs/images/sync-rules-applied-rules-during-sync.png rename to docs/images/sync-rules-applied-rules-during-sync.png diff --git a/docs/reference/connector/docs/images/sync-rules-draft-new-rules.png b/docs/images/sync-rules-draft-new-rules.png similarity index 100% rename from docs/reference/connector/docs/images/sync-rules-draft-new-rules.png rename to docs/images/sync-rules-draft-new-rules.png diff --git a/docs/reference/connector/docs/images/sync-rules-extract-all-at-once.png b/docs/images/sync-rules-extract-all-at-once.png similarity index 100% rename from docs/reference/connector/docs/images/sync-rules-extract-all-at-once.png rename to docs/images/sync-rules-extract-all-at-once.png diff --git a/docs/reference/connector/docs/images/sync-rules-pagination.png b/docs/images/sync-rules-pagination.png similarity index 100% rename from docs/reference/connector/docs/images/sync-rules-pagination.png rename to docs/images/sync-rules-pagination.png diff --git a/docs/reference/connector/docs/images/sync-rules-paste-aggregation-pipeline.png b/docs/images/sync-rules-paste-aggregation-pipeline.png similarity index 100% rename from docs/reference/connector/docs/images/sync-rules-paste-aggregation-pipeline.png rename to docs/images/sync-rules-paste-aggregation-pipeline.png diff --git a/docs/reference/connector/docs/images/sync-rules-rules-fulfilling-properties.png b/docs/images/sync-rules-rules-fulfilling-properties.png similarity index 100% rename from docs/reference/connector/docs/images/sync-rules-rules-fulfilling-properties.png rename to docs/images/sync-rules-rules-fulfilling-properties.png diff --git a/docs/reference/connector/docs/images/sync-rules-save-and-validate-draft.png b/docs/images/sync-rules-save-and-validate-draft.png similarity index 100% rename from docs/reference/connector/docs/images/sync-rules-save-and-validate-draft.png rename to docs/images/sync-rules-save-and-validate-draft.png diff --git a/docs/reference/connector/docs/images/sync-rules-time-dimension.png b/docs/images/sync-rules-time-dimension.png similarity index 100% rename from docs/reference/connector/docs/images/sync-rules-time-dimension.png rename to docs/images/sync-rules-time-dimension.png diff --git a/docs/reference/esql/functions/signature/tan.svg b/docs/images/tan.svg similarity index 100% rename from docs/reference/esql/functions/signature/tan.svg rename to docs/images/tan.svg diff --git a/docs/reference/esql/functions/signature/tanh.svg b/docs/images/tanh.svg similarity index 100% rename from docs/reference/esql/functions/signature/tanh.svg rename to docs/images/tanh.svg diff --git a/docs/reference/esql/functions/signature/tau.svg b/docs/images/tau.svg similarity index 100% rename from docs/reference/esql/functions/signature/tau.svg rename to docs/images/tau.svg diff --git a/docs/reference/esql/functions/signature/to_base64.svg b/docs/images/to_base64.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_base64.svg rename to docs/images/to_base64.svg diff --git a/docs/reference/esql/functions/signature/to_boolean.svg b/docs/images/to_boolean.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_boolean.svg rename to docs/images/to_boolean.svg diff --git a/docs/reference/esql/functions/signature/to_cartesianpoint.svg b/docs/images/to_cartesianpoint.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_cartesianpoint.svg rename to docs/images/to_cartesianpoint.svg diff --git a/docs/reference/esql/functions/signature/to_cartesianshape.svg b/docs/images/to_cartesianshape.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_cartesianshape.svg rename to docs/images/to_cartesianshape.svg diff --git a/docs/reference/esql/functions/signature/to_date_nanos.svg b/docs/images/to_date_nanos.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_date_nanos.svg rename to docs/images/to_date_nanos.svg diff --git a/docs/reference/esql/functions/signature/to_dateperiod.svg b/docs/images/to_dateperiod.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_dateperiod.svg rename to docs/images/to_dateperiod.svg diff --git a/docs/reference/esql/functions/signature/to_datetime.svg b/docs/images/to_datetime.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_datetime.svg rename to docs/images/to_datetime.svg diff --git a/docs/reference/esql/functions/signature/to_degrees.svg b/docs/images/to_degrees.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_degrees.svg rename to docs/images/to_degrees.svg diff --git a/docs/reference/esql/functions/signature/to_double.svg b/docs/images/to_double.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_double.svg rename to docs/images/to_double.svg diff --git a/docs/reference/esql/functions/signature/to_geopoint.svg b/docs/images/to_geopoint.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_geopoint.svg rename to docs/images/to_geopoint.svg diff --git a/docs/reference/esql/functions/signature/to_geoshape.svg b/docs/images/to_geoshape.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_geoshape.svg rename to docs/images/to_geoshape.svg diff --git a/docs/reference/esql/functions/signature/to_integer.svg b/docs/images/to_integer.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_integer.svg rename to docs/images/to_integer.svg diff --git a/docs/reference/esql/functions/signature/to_ip.svg b/docs/images/to_ip.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_ip.svg rename to docs/images/to_ip.svg diff --git a/docs/reference/esql/functions/signature/to_long.svg b/docs/images/to_long.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_long.svg rename to docs/images/to_long.svg diff --git a/docs/reference/esql/functions/signature/to_lower.svg b/docs/images/to_lower.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_lower.svg rename to docs/images/to_lower.svg diff --git a/docs/reference/esql/functions/signature/to_radians.svg b/docs/images/to_radians.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_radians.svg rename to docs/images/to_radians.svg diff --git a/docs/reference/esql/functions/signature/to_string.svg b/docs/images/to_string.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_string.svg rename to docs/images/to_string.svg diff --git a/docs/reference/esql/functions/signature/to_timeduration.svg b/docs/images/to_timeduration.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_timeduration.svg rename to docs/images/to_timeduration.svg diff --git a/docs/reference/esql/functions/signature/to_unsigned_long.svg b/docs/images/to_unsigned_long.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_unsigned_long.svg rename to docs/images/to_unsigned_long.svg diff --git a/docs/reference/esql/functions/signature/to_upper.svg b/docs/images/to_upper.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_upper.svg rename to docs/images/to_upper.svg diff --git a/docs/reference/esql/functions/signature/to_version.svg b/docs/images/to_version.svg similarity index 100% rename from docs/reference/esql/functions/signature/to_version.svg rename to docs/images/to_version.svg diff --git a/docs/reference/images/analysis/token-graph-basic.svg b/docs/images/token-graph-basic.svg similarity index 100% rename from docs/reference/images/analysis/token-graph-basic.svg rename to docs/images/token-graph-basic.svg diff --git a/docs/reference/images/analysis/token-graph-dns-invalid-ex.svg b/docs/images/token-graph-dns-invalid-ex.svg similarity index 100% rename from docs/reference/images/analysis/token-graph-dns-invalid-ex.svg rename to docs/images/token-graph-dns-invalid-ex.svg diff --git a/docs/reference/images/analysis/token-graph-dns-synonym-ex.svg b/docs/images/token-graph-dns-synonym-ex.svg similarity index 100% rename from docs/reference/images/analysis/token-graph-dns-synonym-ex.svg rename to docs/images/token-graph-dns-synonym-ex.svg diff --git a/docs/reference/images/analysis/token-graph-wd.svg b/docs/images/token-graph-wd.svg similarity index 100% rename from docs/reference/images/analysis/token-graph-wd.svg rename to docs/images/token-graph-wd.svg diff --git a/docs/reference/images/analysis/token-graph-wdg.svg b/docs/images/token-graph-wdg.svg similarity index 100% rename from docs/reference/images/analysis/token-graph-wdg.svg rename to docs/images/token-graph-wdg.svg diff --git a/docs/reference/esql/functions/signature/top.svg b/docs/images/top.svg similarity index 100% rename from docs/reference/esql/functions/signature/top.svg rename to docs/images/top.svg diff --git a/docs/reference/esql/functions/signature/trim.svg b/docs/images/trim.svg similarity index 100% rename from docs/reference/esql/functions/signature/trim.svg rename to docs/images/trim.svg diff --git a/docs/reference/images/esql/unstructured-data.png b/docs/images/unstructured-data.png similarity index 100% rename from docs/reference/images/esql/unstructured-data.png rename to docs/images/unstructured-data.png diff --git a/docs/reference/connector/docs/images/use-a-connector-workflow.png b/docs/images/use-a-connector-workflow.png similarity index 100% rename from docs/reference/connector/docs/images/use-a-connector-workflow.png rename to docs/images/use-a-connector-workflow.png diff --git a/docs/reference/esql/functions/signature/values.svg b/docs/images/values.svg similarity index 100% rename from docs/reference/esql/functions/signature/values.svg rename to docs/images/values.svg diff --git a/docs/images/weighted_avg.svg b/docs/images/weighted_avg.svg new file mode 100644 index 0000000000000..cb6c448609f08 --- /dev/null +++ b/docs/images/weighted_avg.svg @@ -0,0 +1 @@ +WEIGHTED_AVG(number,weight) \ No newline at end of file diff --git a/docs/painless/index.asciidoc b/docs/painless/index.asciidoc deleted file mode 100644 index c41899bbd98da..0000000000000 --- a/docs/painless/index.asciidoc +++ /dev/null @@ -1,12 +0,0 @@ -[[painless]] -= Painless Scripting Language - -include::../Versions.asciidoc[] - -include::painless-guide.asciidoc[] - -include::painless-lang-spec.asciidoc[] - -include::painless-contexts.asciidoc[] - -include::painless-api-reference.asciidoc[] \ No newline at end of file diff --git a/docs/painless/painless-api-reference.asciidoc b/docs/painless/painless-api-reference.asciidoc deleted file mode 100644 index 4ae770266c207..0000000000000 --- a/docs/painless/painless-api-reference.asciidoc +++ /dev/null @@ -1,11 +0,0 @@ -[[painless-api-reference]] -== Painless API Reference - -Painless has a strict list of allowed methods and classes per context to -ensure all Painless scripts are secure. Most of these methods are -exposed directly from the Java Runtime Environment (JRE) while others -are part of Elasticsearch or Painless itself. Below is a list of the available -APIs per context. The shared API is available to all contexts, while the -specialized API available differs between contexts. - -include::painless-api-reference/index.asciidoc[] diff --git a/docs/painless/painless-api-reference/index.asciidoc b/docs/painless/painless-api-reference/index.asciidoc deleted file mode 100644 index d698465610396..0000000000000 --- a/docs/painless/painless-api-reference/index.asciidoc +++ /dev/null @@ -1,61 +0,0 @@ -// This file is auto-generated. Do not edit. - -[cols="<3,^3,^3"] -|==== -|Aggregation Selector | <> | <> -|Aggs | <> | <> -|Aggs Combine | <> | <> -|Aggs Init | <> | <> -|Aggs Map | <> | <> -|Aggs Reduce | <> | <> -|Analysis | <> | <> -|Bucket Aggregation | <> | <> -|Field | <> | <> -|Filter | <> | <> -|Ingest | <> | <> -|Interval | <> | <> -|Moving Function | <> | <> -|Number Sort | <> | <> -|Painless Test | <> | <> -|Processor Conditional | <> | <> -|Score | <> | <> -|Script Heuristic | <> | <> -|Similarity | <> | <> -|Similarity Weight | <> | <> -|String Sort | <> | <> -|Template | <> | <> -|Terms Set | <> | <> -|Update | <> | <> -|Watcher Condition | <> | <> -|Watcher Transform | <> | <> -|Xpack Template | <> | <> -|==== - -include::painless-api-reference-shared/index.asciidoc[] -include::painless-api-reference-aggregation-selector/index.asciidoc[] -include::painless-api-reference-aggs/index.asciidoc[] -include::painless-api-reference-aggs-combine/index.asciidoc[] -include::painless-api-reference-aggs-init/index.asciidoc[] -include::painless-api-reference-aggs-map/index.asciidoc[] -include::painless-api-reference-aggs-reduce/index.asciidoc[] -include::painless-api-reference-analysis/index.asciidoc[] -include::painless-api-reference-bucket-aggregation/index.asciidoc[] -include::painless-api-reference-field/index.asciidoc[] -include::painless-api-reference-filter/index.asciidoc[] -include::painless-api-reference-ingest/index.asciidoc[] -include::painless-api-reference-interval/index.asciidoc[] -include::painless-api-reference-moving-function/index.asciidoc[] -include::painless-api-reference-number-sort/index.asciidoc[] -include::painless-api-reference-painless-test/index.asciidoc[] -include::painless-api-reference-processor-conditional/index.asciidoc[] -include::painless-api-reference-score/index.asciidoc[] -include::painless-api-reference-script-heuristic/index.asciidoc[] -include::painless-api-reference-similarity/index.asciidoc[] -include::painless-api-reference-similarity-weight/index.asciidoc[] -include::painless-api-reference-string-sort/index.asciidoc[] -include::painless-api-reference-template/index.asciidoc[] -include::painless-api-reference-terms-set/index.asciidoc[] -include::painless-api-reference-update/index.asciidoc[] -include::painless-api-reference-watcher-condition/index.asciidoc[] -include::painless-api-reference-watcher-transform/index.asciidoc[] -include::painless-api-reference-xpack-template/index.asciidoc[] diff --git a/docs/painless/painless-api-reference/painless-api-reference-aggregation-selector/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-aggregation-selector/index.asciidoc deleted file mode 100644 index 3a82dc9536a36..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-aggregation-selector/index.asciidoc +++ /dev/null @@ -1,31 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-aggregation-selector]] -=== Aggregation Selector API - -The following specialized API is available in the Aggregation Selector context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -==== org.elasticsearch.xpack.sql.expression.literal.geo -<> - -* <> - -==== org.elasticsearch.xpack.sql.expression.literal.interval -<> - -* <> -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-aggregation-selector/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-aggregation-selector/packages.asciidoc deleted file mode 100644 index bf87efc31236c..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-aggregation-selector/packages.asciidoc +++ /dev/null @@ -1,91 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-aggregation-selector-java-lang"] -=== Aggregation Selector API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-aggregation-selector-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - -[role="exclude",id="painless-api-reference-aggregation-selector-org-elasticsearch-xpack-sql-expression-literal-geo"] -=== Aggregation Selector API for package org.elasticsearch.xpack.sql.expression.literal.geo -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-aggregation-selector-GeoShape]] -==== GeoShape -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-aggregation-selector-org-elasticsearch-xpack-sql-expression-literal-interval"] -=== Aggregation Selector API for package org.elasticsearch.xpack.sql.expression.literal.interval -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-aggregation-selector-IntervalDayTime]] -==== IntervalDayTime -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-aggregation-selector-IntervalYearMonth]] -==== IntervalYearMonth -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-aggs-combine/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-aggs-combine/index.asciidoc deleted file mode 100644 index 420797f80ed1d..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-aggs-combine/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-aggs-combine]] -=== Aggs Combine API - -The following specialized API is available in the Aggs Combine context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-aggs-combine/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-aggs-combine/packages.asciidoc deleted file mode 100644 index 273ff65e45d60..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-aggs-combine/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-aggs-combine-java-lang"] -=== Aggs Combine API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-aggs-combine-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-aggs-init/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-aggs-init/index.asciidoc deleted file mode 100644 index 6fcf22ba13d34..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-aggs-init/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-aggs-init]] -=== Aggs Init API - -The following specialized API is available in the Aggs Init context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-aggs-init/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-aggs-init/packages.asciidoc deleted file mode 100644 index c54209d983592..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-aggs-init/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-aggs-init-java-lang"] -=== Aggs Init API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-aggs-init-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-aggs-map/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-aggs-map/index.asciidoc deleted file mode 100644 index 2a92287889fff..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-aggs-map/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-aggs-map]] -=== Aggs Map API - -The following specialized API is available in the Aggs Map context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-aggs-map/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-aggs-map/packages.asciidoc deleted file mode 100644 index 1cbdc72c725cd..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-aggs-map/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-aggs-map-java-lang"] -=== Aggs Map API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-aggs-map-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-aggs-reduce/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-aggs-reduce/index.asciidoc deleted file mode 100644 index ada5a7ffb2f6f..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-aggs-reduce/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-aggs-reduce]] -=== Aggs Reduce API - -The following specialized API is available in the Aggs Reduce context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-aggs-reduce/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-aggs-reduce/packages.asciidoc deleted file mode 100644 index 4ef0fd1133483..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-aggs-reduce/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-aggs-reduce-java-lang"] -=== Aggs Reduce API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-aggs-reduce-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-aggs/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-aggs/index.asciidoc deleted file mode 100644 index 9cfe9531fab4f..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-aggs/index.asciidoc +++ /dev/null @@ -1,31 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-aggs]] -=== Aggs API - -The following specialized API is available in the Aggs context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -==== org.elasticsearch.xpack.sql.expression.literal.geo -<> - -* <> - -==== org.elasticsearch.xpack.sql.expression.literal.interval -<> - -* <> -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-aggs/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-aggs/packages.asciidoc deleted file mode 100644 index 3213ce2dbf66f..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-aggs/packages.asciidoc +++ /dev/null @@ -1,91 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-aggs-java-lang"] -=== Aggs API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-aggs-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - -[role="exclude",id="painless-api-reference-aggs-org-elasticsearch-xpack-sql-expression-literal-geo"] -=== Aggs API for package org.elasticsearch.xpack.sql.expression.literal.geo -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-aggs-GeoShape]] -==== GeoShape -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-aggs-org-elasticsearch-xpack-sql-expression-literal-interval"] -=== Aggs API for package org.elasticsearch.xpack.sql.expression.literal.interval -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-aggs-IntervalDayTime]] -==== IntervalDayTime -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-aggs-IntervalYearMonth]] -==== IntervalYearMonth -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-analysis/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-analysis/index.asciidoc deleted file mode 100644 index 491bc49ae06a0..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-analysis/index.asciidoc +++ /dev/null @@ -1,25 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-analysis]] -=== Analysis API - -The following specialized API is available in the Analysis context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -==== org.elasticsearch.analysis.common -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-analysis/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-analysis/packages.asciidoc deleted file mode 100644 index b5052084ee029..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-analysis/packages.asciidoc +++ /dev/null @@ -1,81 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-analysis-java-lang"] -=== Analysis API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-analysis-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - -[role="exclude",id="painless-api-reference-analysis-org-elasticsearch-analysis-common"] -=== Analysis API for package org.elasticsearch.analysis.common -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-analysis-AnalysisPredicateScript-Token]] -==== AnalysisPredicateScript.Token -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int getEndOffset() -* int getPosition() -* int getPositionIncrement() -* int getPositionLength() -* int getStartOffset() -* CharSequence getTerm() -* String getType() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean isKeyword() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-bucket-aggregation/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-bucket-aggregation/index.asciidoc deleted file mode 100644 index f63ba71423c8c..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-bucket-aggregation/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-bucket-aggregation]] -=== Bucket Aggregation API - -The following specialized API is available in the Bucket Aggregation context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-bucket-aggregation/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-bucket-aggregation/packages.asciidoc deleted file mode 100644 index e77ea8fb47306..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-bucket-aggregation/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-bucket-aggregation-java-lang"] -=== Bucket Aggregation API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-bucket-aggregation-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-field/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-field/index.asciidoc deleted file mode 100644 index ac04e923a8994..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-field/index.asciidoc +++ /dev/null @@ -1,37 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-field]] -=== Field API - -The following specialized API is available in the Field context. - -* See the <> for further API available in all contexts. - -==== Static Methods -The following methods are directly callable without a class/instance qualifier. Note parameters denoted by a (*) are treated as read-only values. - -* List domainSplit(String) -* List domainSplit(String, Map) - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -==== org.elasticsearch.xpack.sql.expression.literal.geo -<> - -* <> - -==== org.elasticsearch.xpack.sql.expression.literal.interval -<> - -* <> -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-field/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-field/packages.asciidoc deleted file mode 100644 index bc6b6601924d6..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-field/packages.asciidoc +++ /dev/null @@ -1,91 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-field-java-lang"] -=== Field API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-field-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - -[role="exclude",id="painless-api-reference-field-org-elasticsearch-xpack-sql-expression-literal-geo"] -=== Field API for package org.elasticsearch.xpack.sql.expression.literal.geo -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-field-GeoShape]] -==== GeoShape -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-field-org-elasticsearch-xpack-sql-expression-literal-interval"] -=== Field API for package org.elasticsearch.xpack.sql.expression.literal.interval -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-field-IntervalDayTime]] -==== IntervalDayTime -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-field-IntervalYearMonth]] -==== IntervalYearMonth -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-filter/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-filter/index.asciidoc deleted file mode 100644 index 0b69b3b150169..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-filter/index.asciidoc +++ /dev/null @@ -1,31 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-filter]] -=== Filter API - -The following specialized API is available in the Filter context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -==== org.elasticsearch.xpack.sql.expression.literal.geo -<> - -* <> - -==== org.elasticsearch.xpack.sql.expression.literal.interval -<> - -* <> -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-filter/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-filter/packages.asciidoc deleted file mode 100644 index 462566d1fb6a3..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-filter/packages.asciidoc +++ /dev/null @@ -1,91 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-filter-java-lang"] -=== Filter API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-filter-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - -[role="exclude",id="painless-api-reference-filter-org-elasticsearch-xpack-sql-expression-literal-geo"] -=== Filter API for package org.elasticsearch.xpack.sql.expression.literal.geo -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-filter-GeoShape]] -==== GeoShape -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-filter-org-elasticsearch-xpack-sql-expression-literal-interval"] -=== Filter API for package org.elasticsearch.xpack.sql.expression.literal.interval -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-filter-IntervalDayTime]] -==== IntervalDayTime -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-filter-IntervalYearMonth]] -==== IntervalYearMonth -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-ingest/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-ingest/index.asciidoc deleted file mode 100644 index 7b53fe84af608..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-ingest/index.asciidoc +++ /dev/null @@ -1,25 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-ingest]] -=== Ingest API - -The following specialized API is available in the Ingest context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -==== org.elasticsearch.ingest.common -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-ingest/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-ingest/packages.asciidoc deleted file mode 100644 index dddb566afcafc..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-ingest/packages.asciidoc +++ /dev/null @@ -1,81 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-ingest-java-lang"] -=== Ingest API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-ingest-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String sha1() -* String sha256() -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - -[role="exclude",id="painless-api-reference-ingest-org-elasticsearch-ingest-common"] -=== Ingest API for package org.elasticsearch.ingest.common -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-ingest-Processors]] -==== Processors -* static long bytes(String) -* static Object json(Object) -* static void json(Map, String) -* static String lowercase(String) -* static String uppercase(String) -* static String urlDecode(String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-interval/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-interval/index.asciidoc deleted file mode 100644 index a967992d78602..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-interval/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-interval]] -=== Interval API - -The following specialized API is available in the Interval context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-interval/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-interval/packages.asciidoc deleted file mode 100644 index c33acf7a2570b..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-interval/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-interval-java-lang"] -=== Interval API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-interval-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-moving-function/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-moving-function/index.asciidoc deleted file mode 100644 index ee7ebe2ed263a..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-moving-function/index.asciidoc +++ /dev/null @@ -1,25 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-moving-function]] -=== Moving Function API - -The following specialized API is available in the Moving Function context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -==== org.elasticsearch.search.aggregations.pipeline -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-moving-function/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-moving-function/packages.asciidoc deleted file mode 100644 index a30c9fc6935a2..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-moving-function/packages.asciidoc +++ /dev/null @@ -1,82 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-moving-function-java-lang"] -=== Moving Function API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-moving-function-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - -[role="exclude",id="painless-api-reference-moving-function-org-elasticsearch-search-aggregations-pipeline"] -=== Moving Function API for package org.elasticsearch.search.aggregations.pipeline -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-moving-function-MovingFunctions]] -==== MovingFunctions -* static double ewma(double[], double) -* static double holt(double[], double, double) -* static double holtWinters(double[], double, double, double, int, boolean) -* static double linearWeightedAvg(double[]) -* static double max(double[]) -* static double min(double[]) -* static double stdDev(double[], double) -* static double sum(double[]) -* static double unweightedAvg(double[]) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-number-sort/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-number-sort/index.asciidoc deleted file mode 100644 index a91a6af5f8418..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-number-sort/index.asciidoc +++ /dev/null @@ -1,31 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-number-sort]] -=== Number Sort API - -The following specialized API is available in the Number Sort context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -==== org.elasticsearch.xpack.sql.expression.literal.geo -<> - -* <> - -==== org.elasticsearch.xpack.sql.expression.literal.interval -<> - -* <> -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-number-sort/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-number-sort/packages.asciidoc deleted file mode 100644 index 20157333a088c..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-number-sort/packages.asciidoc +++ /dev/null @@ -1,91 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-number-sort-java-lang"] -=== Number Sort API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-number-sort-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - -[role="exclude",id="painless-api-reference-number-sort-org-elasticsearch-xpack-sql-expression-literal-geo"] -=== Number Sort API for package org.elasticsearch.xpack.sql.expression.literal.geo -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-number-sort-GeoShape]] -==== GeoShape -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-number-sort-org-elasticsearch-xpack-sql-expression-literal-interval"] -=== Number Sort API for package org.elasticsearch.xpack.sql.expression.literal.interval -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-number-sort-IntervalDayTime]] -==== IntervalDayTime -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-number-sort-IntervalYearMonth]] -==== IntervalYearMonth -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-painless-test/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-painless-test/index.asciidoc deleted file mode 100644 index 5ceee2904beff..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-painless-test/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-painless-test]] -=== Painless Test API - -The following specialized API is available in the Painless Test context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-painless-test/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-painless-test/packages.asciidoc deleted file mode 100644 index ee58588b3ff93..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-painless-test/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-painless-test-java-lang"] -=== Painless Test API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-painless-test-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-processor-conditional/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-processor-conditional/index.asciidoc deleted file mode 100644 index 1d2096b0f59d1..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-processor-conditional/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-processor-conditional]] -=== Processor Conditional API - -The following specialized API is available in the Processor Conditional context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-processor-conditional/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-processor-conditional/packages.asciidoc deleted file mode 100644 index 66b5d475b757a..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-processor-conditional/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-processor-conditional-java-lang"] -=== Processor Conditional API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-processor-conditional-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-score/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-score/index.asciidoc deleted file mode 100644 index 4300a1c7efc66..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-score/index.asciidoc +++ /dev/null @@ -1,47 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-score]] -=== Score API - -The following specialized API is available in the Score context. - -* See the <> for further API available in all contexts. - -==== Static Methods -The following methods are directly callable without a class/instance qualifier. Note parameters denoted by a (*) are treated as read-only values. - -* double cosineSimilarity(Object *, String *) -* double decayDateExp(String *, String *, String *, double *, ZonedDateTime) -* double decayDateGauss(String *, String *, String *, double *, ZonedDateTime) -* double decayDateLinear(String *, String *, String *, double *, ZonedDateTime) -* double decayGeoExp(String *, String *, String *, double *, GeoPoint) -* double decayGeoGauss(String *, String *, String *, double *, GeoPoint) -* double decayGeoLinear(String *, String *, String *, double *, GeoPoint) -* double decayNumericExp(double *, double *, double *, double *, double) -* double decayNumericGauss(double *, double *, double *, double *, double) -* double decayNumericLinear(double *, double *, double *, double *, double) -* double dotProduct(Object *, String *) -* double l1norm(Object *, String *) -* double l2norm(Object *, String *) -* double hamming(Object *, String *) -* double randomScore(int *) -* double randomScore(int *, String *) -* double saturation(double, double) -* double sigmoid(double, double, double) - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -==== org.elasticsearch.xpack.vectors.query -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-score/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-score/packages.asciidoc deleted file mode 100644 index 930834cfac2b2..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-score/packages.asciidoc +++ /dev/null @@ -1,122 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-score-java-lang"] -=== Score API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-score-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - -[role="exclude",id="painless-api-reference-score-org-elasticsearch-xpack-vectors-query"] -=== Score API for package org.elasticsearch.xpack.vectors.query -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-score-DenseVectorScriptDocValues]] -==== DenseVectorScriptDocValues -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* def {java11-javadoc}/java.base/java/util/List.html#get(int)[get](int) -* Object getByPath(String) -* Object getByPath(String, Object) -* int getLength() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* String join(String) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-script-heuristic/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-script-heuristic/index.asciidoc deleted file mode 100644 index 8b32ad624a32a..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-script-heuristic/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-script-heuristic]] -=== Script Heuristic API - -The following specialized API is available in the Script Heuristic context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-script-heuristic/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-script-heuristic/packages.asciidoc deleted file mode 100644 index e5799e8fdd408..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-script-heuristic/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-script-heuristic-java-lang"] -=== Script Heuristic API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-script-heuristic-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-shared/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-shared/index.asciidoc deleted file mode 100644 index c8bbedadf6b4e..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-shared/index.asciidoc +++ /dev/null @@ -1,435 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-shared]] -=== Shared API - -The following API is available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -==== java.math -<> - -* <> -* <> -* <> -* <> - -==== java.text -<> - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -==== java.time -<> - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -==== java.time.chrono -<> - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -==== java.time.format -<> - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -==== java.time.temporal -<> - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -==== java.time.zone -<> - -* <> -* <> -* <> -* <> -* <> -* <> - -==== java.util -<> - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -==== java.util.function -<> - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -==== java.util.regex -<> - -* <> -* <> - -==== java.util.stream -<> - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -==== org.apache.lucene.util -<> - -* <> - -==== org.elasticsearch.common.geo -<> - -* <> - -==== org.elasticsearch.index.fielddata -<> - -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -==== org.elasticsearch.index.mapper -<> - -* <> - -==== org.elasticsearch.index.query -<> - -* <> - -==== org.elasticsearch.index.similarity -<> - -* <> -* <> -* <> -* <> - -==== org.elasticsearch.painless.api -<> - -* <> - -==== org.elasticsearch.script -<> - -* <> - -==== org.elasticsearch.search.lookup -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-shared/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-shared/packages.asciidoc deleted file mode 100644 index 584d7ade9ec7c..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-shared/packages.asciidoc +++ /dev/null @@ -1,8616 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-shared-java-lang"] -=== Shared API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-Appendable]] -==== Appendable -* Appendable {java11-javadoc}/java.base/java/lang/Appendable.html#append(java.lang.CharSequence,int,int)[append](CharSequence, int, int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ArithmeticException]] -==== ArithmeticException -* {java11-javadoc}/java.base/java/lang/ArithmeticException.html#()[ArithmeticException]() -* {java11-javadoc}/java.base/java/lang/ArithmeticException.html#(java.lang.String)[ArithmeticException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ArrayIndexOutOfBoundsException]] -==== ArrayIndexOutOfBoundsException -* {java11-javadoc}/java.base/java/lang/ArrayIndexOutOfBoundsException.html#()[ArrayIndexOutOfBoundsException]() -* {java11-javadoc}/java.base/java/lang/ArrayIndexOutOfBoundsException.html#(java.lang.String)[ArrayIndexOutOfBoundsException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ArrayStoreException]] -==== ArrayStoreException -* {java11-javadoc}/java.base/java/lang/ArrayStoreException.html#()[ArrayStoreException]() -* {java11-javadoc}/java.base/java/lang/ArrayStoreException.html#(java.lang.String)[ArrayStoreException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Boolean]] -==== Boolean -* static Boolean {java11-javadoc}/java.base/java/lang/Boolean.html#FALSE[FALSE] -* static Boolean {java11-javadoc}/java.base/java/lang/Boolean.html#TRUE[TRUE] -* static int {java11-javadoc}/java.base/java/lang/Boolean.html#compare(boolean,boolean)[compare](boolean, boolean) -* static int {java11-javadoc}/java.base/java/lang/Boolean.html#hashCode(boolean)[hashCode](boolean) -* static boolean {java11-javadoc}/java.base/java/lang/Boolean.html#logicalAnd(boolean,boolean)[logicalAnd](boolean, boolean) -* static boolean {java11-javadoc}/java.base/java/lang/Boolean.html#logicalOr(boolean,boolean)[logicalOr](boolean, boolean) -* static boolean {java11-javadoc}/java.base/java/lang/Boolean.html#logicalXor(boolean,boolean)[logicalXor](boolean, boolean) -* static boolean {java11-javadoc}/java.base/java/lang/Boolean.html#parseBoolean(java.lang.String)[parseBoolean](null) -* static null {java11-javadoc}/java.base/java/lang/Boolean.html#toString(boolean)[toString](boolean) -* static Boolean {java11-javadoc}/java.base/java/lang/Boolean.html#valueOf(boolean)[valueOf](boolean) -* boolean {java11-javadoc}/java.base/java/lang/Boolean.html#booleanValue()[booleanValue]() -* int {java11-javadoc}/java.base/java/lang/Boolean.html#compareTo(java.lang.Boolean)[compareTo](Boolean) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Byte]] -==== Byte -* static int {java11-javadoc}/java.base/java/lang/Byte.html#BYTES[BYTES] -* static byte {java11-javadoc}/java.base/java/lang/Byte.html#MAX_VALUE[MAX_VALUE] -* static byte {java11-javadoc}/java.base/java/lang/Byte.html#MIN_VALUE[MIN_VALUE] -* static int {java11-javadoc}/java.base/java/lang/Byte.html#SIZE[SIZE] -* static int {java11-javadoc}/java.base/java/lang/Byte.html#compare(byte,byte)[compare](byte, byte) -* static Byte {java11-javadoc}/java.base/java/lang/Byte.html#decode(java.lang.String)[decode](null) -* static int {java11-javadoc}/java.base/java/lang/Byte.html#hashCode(byte)[hashCode](byte) -* static byte {java11-javadoc}/java.base/java/lang/Byte.html#parseByte(java.lang.String)[parseByte](null) -* static byte {java11-javadoc}/java.base/java/lang/Byte.html#parseByte(java.lang.String,int)[parseByte](null, int) -* static null {java11-javadoc}/java.base/java/lang/Byte.html#toString(byte)[toString](byte) -* static int {java11-javadoc}/java.base/java/lang/Byte.html#toUnsignedInt(byte)[toUnsignedInt](byte) -* static long {java11-javadoc}/java.base/java/lang/Byte.html#toUnsignedLong(byte)[toUnsignedLong](byte) -* static Byte {java11-javadoc}/java.base/java/lang/Byte.html#valueOf(byte)[valueOf](byte) -* static Byte {java11-javadoc}/java.base/java/lang/Byte.html#valueOf(java.lang.String,int)[valueOf](null, int) -* byte {java11-javadoc}/java.base/java/lang/Number.html#byteValue()[byteValue]() -* int {java11-javadoc}/java.base/java/lang/Byte.html#compareTo(java.lang.Byte)[compareTo](Byte) -* double {java11-javadoc}/java.base/java/lang/Number.html#doubleValue()[doubleValue]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* float {java11-javadoc}/java.base/java/lang/Number.html#floatValue()[floatValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/Number.html#intValue()[intValue]() -* long {java11-javadoc}/java.base/java/lang/Number.html#longValue()[longValue]() -* short {java11-javadoc}/java.base/java/lang/Number.html#shortValue()[shortValue]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-CharSequence]] -==== CharSequence -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* null replaceAll(Pattern, Function) -* null replaceFirst(Pattern, Function) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* null {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() - - -[[painless-api-reference-shared-Character]] -==== Character -* static int {java11-javadoc}/java.base/java/lang/Character.html#BYTES[BYTES] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#COMBINING_SPACING_MARK[COMBINING_SPACING_MARK] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#CONNECTOR_PUNCTUATION[CONNECTOR_PUNCTUATION] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#CONTROL[CONTROL] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#CURRENCY_SYMBOL[CURRENCY_SYMBOL] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DASH_PUNCTUATION[DASH_PUNCTUATION] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DECIMAL_DIGIT_NUMBER[DECIMAL_DIGIT_NUMBER] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_ARABIC_NUMBER[DIRECTIONALITY_ARABIC_NUMBER] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_BOUNDARY_NEUTRAL[DIRECTIONALITY_BOUNDARY_NEUTRAL] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR[DIRECTIONALITY_COMMON_NUMBER_SEPARATOR] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_EUROPEAN_NUMBER[DIRECTIONALITY_EUROPEAN_NUMBER] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR[DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR[DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_LEFT_TO_RIGHT[DIRECTIONALITY_LEFT_TO_RIGHT] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING[DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE[DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_NONSPACING_MARK[DIRECTIONALITY_NONSPACING_MARK] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_OTHER_NEUTRALS[DIRECTIONALITY_OTHER_NEUTRALS] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_PARAGRAPH_SEPARATOR[DIRECTIONALITY_PARAGRAPH_SEPARATOR] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT[DIRECTIONALITY_POP_DIRECTIONAL_FORMAT] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_RIGHT_TO_LEFT[DIRECTIONALITY_RIGHT_TO_LEFT] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC[DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING[DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE[DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_SEGMENT_SEPARATOR[DIRECTIONALITY_SEGMENT_SEPARATOR] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_UNDEFINED[DIRECTIONALITY_UNDEFINED] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#DIRECTIONALITY_WHITESPACE[DIRECTIONALITY_WHITESPACE] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#ENCLOSING_MARK[ENCLOSING_MARK] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#END_PUNCTUATION[END_PUNCTUATION] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#FINAL_QUOTE_PUNCTUATION[FINAL_QUOTE_PUNCTUATION] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#FORMAT[FORMAT] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#INITIAL_QUOTE_PUNCTUATION[INITIAL_QUOTE_PUNCTUATION] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#LETTER_NUMBER[LETTER_NUMBER] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#LINE_SEPARATOR[LINE_SEPARATOR] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#LOWERCASE_LETTER[LOWERCASE_LETTER] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#MATH_SYMBOL[MATH_SYMBOL] -* static int {java11-javadoc}/java.base/java/lang/Character.html#MAX_CODE_POINT[MAX_CODE_POINT] -* static char {java11-javadoc}/java.base/java/lang/Character.html#MAX_HIGH_SURROGATE[MAX_HIGH_SURROGATE] -* static char {java11-javadoc}/java.base/java/lang/Character.html#MAX_LOW_SURROGATE[MAX_LOW_SURROGATE] -* static int {java11-javadoc}/java.base/java/lang/Character.html#MAX_RADIX[MAX_RADIX] -* static char {java11-javadoc}/java.base/java/lang/Character.html#MAX_SURROGATE[MAX_SURROGATE] -* static char {java11-javadoc}/java.base/java/lang/Character.html#MAX_VALUE[MAX_VALUE] -* static int {java11-javadoc}/java.base/java/lang/Character.html#MIN_CODE_POINT[MIN_CODE_POINT] -* static char {java11-javadoc}/java.base/java/lang/Character.html#MIN_HIGH_SURROGATE[MIN_HIGH_SURROGATE] -* static char {java11-javadoc}/java.base/java/lang/Character.html#MIN_LOW_SURROGATE[MIN_LOW_SURROGATE] -* static int {java11-javadoc}/java.base/java/lang/Character.html#MIN_RADIX[MIN_RADIX] -* static int {java11-javadoc}/java.base/java/lang/Character.html#MIN_SUPPLEMENTARY_CODE_POINT[MIN_SUPPLEMENTARY_CODE_POINT] -* static char {java11-javadoc}/java.base/java/lang/Character.html#MIN_SURROGATE[MIN_SURROGATE] -* static char {java11-javadoc}/java.base/java/lang/Character.html#MIN_VALUE[MIN_VALUE] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#MODIFIER_LETTER[MODIFIER_LETTER] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#MODIFIER_SYMBOL[MODIFIER_SYMBOL] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#NON_SPACING_MARK[NON_SPACING_MARK] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#OTHER_LETTER[OTHER_LETTER] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#OTHER_NUMBER[OTHER_NUMBER] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#OTHER_PUNCTUATION[OTHER_PUNCTUATION] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#OTHER_SYMBOL[OTHER_SYMBOL] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#PARAGRAPH_SEPARATOR[PARAGRAPH_SEPARATOR] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#PRIVATE_USE[PRIVATE_USE] -* static int {java11-javadoc}/java.base/java/lang/Character.html#SIZE[SIZE] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#SPACE_SEPARATOR[SPACE_SEPARATOR] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#START_PUNCTUATION[START_PUNCTUATION] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#SURROGATE[SURROGATE] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#TITLECASE_LETTER[TITLECASE_LETTER] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#UNASSIGNED[UNASSIGNED] -* static byte {java11-javadoc}/java.base/java/lang/Character.html#UPPERCASE_LETTER[UPPERCASE_LETTER] -* static int {java11-javadoc}/java.base/java/lang/Character.html#charCount(int)[charCount](int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#codePointAt(java.lang.CharSequence,int)[codePointAt](CharSequence, int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#codePointAt(char%5B%5D,int,int)[codePointAt](char[], int, int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#codePointBefore(java.lang.CharSequence,int)[codePointBefore](CharSequence, int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#codePointBefore(char%5B%5D,int,int)[codePointBefore](char[], int, int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#codePointCount(java.lang.CharSequence,int,int)[codePointCount](CharSequence, int, int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#compare(char,char)[compare](char, char) -* static int {java11-javadoc}/java.base/java/lang/Character.html#digit(int,int)[digit](int, int) -* static char {java11-javadoc}/java.base/java/lang/Character.html#forDigit(int,int)[forDigit](int, int) -* static byte {java11-javadoc}/java.base/java/lang/Character.html#getDirectionality(int)[getDirectionality](int) -* static null {java11-javadoc}/java.base/java/lang/Character.html#getName(int)[getName](int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#getNumericValue(int)[getNumericValue](int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#getType(int)[getType](int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#hashCode(char)[hashCode](char) -* static char {java11-javadoc}/java.base/java/lang/Character.html#highSurrogate(int)[highSurrogate](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isAlphabetic(int)[isAlphabetic](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isBmpCodePoint(int)[isBmpCodePoint](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isDefined(int)[isDefined](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isDigit(int)[isDigit](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isHighSurrogate(char)[isHighSurrogate](char) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isISOControl(int)[isISOControl](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isIdentifierIgnorable(int)[isIdentifierIgnorable](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isIdeographic(int)[isIdeographic](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isJavaIdentifierPart(int)[isJavaIdentifierPart](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isJavaIdentifierStart(int)[isJavaIdentifierStart](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isLetter(int)[isLetter](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isLetterOrDigit(int)[isLetterOrDigit](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isLowerCase(int)[isLowerCase](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isMirrored(int)[isMirrored](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isSpaceChar(int)[isSpaceChar](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isSupplementaryCodePoint(int)[isSupplementaryCodePoint](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isSurrogate(char)[isSurrogate](char) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isSurrogatePair(char,char)[isSurrogatePair](char, char) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isTitleCase(int)[isTitleCase](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isUnicodeIdentifierPart(int)[isUnicodeIdentifierPart](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isUnicodeIdentifierStart(int)[isUnicodeIdentifierStart](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isUpperCase(int)[isUpperCase](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isValidCodePoint(int)[isValidCodePoint](int) -* static boolean {java11-javadoc}/java.base/java/lang/Character.html#isWhitespace(int)[isWhitespace](int) -* static char {java11-javadoc}/java.base/java/lang/Character.html#lowSurrogate(int)[lowSurrogate](int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#offsetByCodePoints(java.lang.CharSequence,int,int)[offsetByCodePoints](CharSequence, int, int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#offsetByCodePoints(char%5B%5D,int,int,int,int)[offsetByCodePoints](char[], int, int, int, int) -* static char {java11-javadoc}/java.base/java/lang/Character.html#reverseBytes(char)[reverseBytes](char) -* static char[] {java11-javadoc}/java.base/java/lang/Character.html#toChars(int)[toChars](int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#toChars(int,char%5B%5D,int)[toChars](int, char[], int) -* static int {java11-javadoc}/java.base/java/lang/Character.html#toCodePoint(char,char)[toCodePoint](char, char) -* static char {java11-javadoc}/java.base/java/lang/Character.html#toLowerCase(char)[toLowerCase](char) -* static null {java11-javadoc}/java.base/java/lang/Character.html#toString(char)[toString](char) -* static char {java11-javadoc}/java.base/java/lang/Character.html#toTitleCase(char)[toTitleCase](char) -* static char {java11-javadoc}/java.base/java/lang/Character.html#toUpperCase(char)[toUpperCase](char) -* static Character {java11-javadoc}/java.base/java/lang/Character.html#valueOf(char)[valueOf](char) -* char {java11-javadoc}/java.base/java/lang/Character.html#charValue()[charValue]() -* int {java11-javadoc}/java.base/java/lang/Character.html#compareTo(java.lang.Character)[compareTo](Character) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Character-Subset]] -==== Character.Subset -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Character-UnicodeBlock]] -==== Character.UnicodeBlock -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#AEGEAN_NUMBERS[AEGEAN_NUMBERS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ALCHEMICAL_SYMBOLS[ALCHEMICAL_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ALPHABETIC_PRESENTATION_FORMS[ALPHABETIC_PRESENTATION_FORMS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ANCIENT_GREEK_MUSICAL_NOTATION[ANCIENT_GREEK_MUSICAL_NOTATION] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ANCIENT_GREEK_NUMBERS[ANCIENT_GREEK_NUMBERS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ANCIENT_SYMBOLS[ANCIENT_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ARABIC[ARABIC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ARABIC_EXTENDED_A[ARABIC_EXTENDED_A] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS[ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ARABIC_PRESENTATION_FORMS_A[ARABIC_PRESENTATION_FORMS_A] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ARABIC_PRESENTATION_FORMS_B[ARABIC_PRESENTATION_FORMS_B] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ARABIC_SUPPLEMENT[ARABIC_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ARMENIAN[ARMENIAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ARROWS[ARROWS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#AVESTAN[AVESTAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BALINESE[BALINESE] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BAMUM[BAMUM] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BAMUM_SUPPLEMENT[BAMUM_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BASIC_LATIN[BASIC_LATIN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BATAK[BATAK] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BENGALI[BENGALI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BLOCK_ELEMENTS[BLOCK_ELEMENTS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BOPOMOFO[BOPOMOFO] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BOPOMOFO_EXTENDED[BOPOMOFO_EXTENDED] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BOX_DRAWING[BOX_DRAWING] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BRAHMI[BRAHMI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BRAILLE_PATTERNS[BRAILLE_PATTERNS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BUGINESE[BUGINESE] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BUHID[BUHID] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#BYZANTINE_MUSICAL_SYMBOLS[BYZANTINE_MUSICAL_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CARIAN[CARIAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CHAKMA[CHAKMA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CHAM[CHAM] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CHEROKEE[CHEROKEE] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CJK_COMPATIBILITY[CJK_COMPATIBILITY] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CJK_COMPATIBILITY_FORMS[CJK_COMPATIBILITY_FORMS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CJK_COMPATIBILITY_IDEOGRAPHS[CJK_COMPATIBILITY_IDEOGRAPHS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT[CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CJK_RADICALS_SUPPLEMENT[CJK_RADICALS_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CJK_STROKES[CJK_STROKES] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CJK_SYMBOLS_AND_PUNCTUATION[CJK_SYMBOLS_AND_PUNCTUATION] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CJK_UNIFIED_IDEOGRAPHS[CJK_UNIFIED_IDEOGRAPHS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A[CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B[CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C[CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D[CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#COMBINING_DIACRITICAL_MARKS[COMBINING_DIACRITICAL_MARKS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#COMBINING_DIACRITICAL_MARKS_SUPPLEMENT[COMBINING_DIACRITICAL_MARKS_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#COMBINING_HALF_MARKS[COMBINING_HALF_MARKS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#COMBINING_MARKS_FOR_SYMBOLS[COMBINING_MARKS_FOR_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#COMMON_INDIC_NUMBER_FORMS[COMMON_INDIC_NUMBER_FORMS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CONTROL_PICTURES[CONTROL_PICTURES] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#COPTIC[COPTIC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#COUNTING_ROD_NUMERALS[COUNTING_ROD_NUMERALS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CUNEIFORM[CUNEIFORM] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CUNEIFORM_NUMBERS_AND_PUNCTUATION[CUNEIFORM_NUMBERS_AND_PUNCTUATION] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CURRENCY_SYMBOLS[CURRENCY_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CYPRIOT_SYLLABARY[CYPRIOT_SYLLABARY] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CYRILLIC[CYRILLIC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CYRILLIC_EXTENDED_A[CYRILLIC_EXTENDED_A] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CYRILLIC_EXTENDED_B[CYRILLIC_EXTENDED_B] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#CYRILLIC_SUPPLEMENTARY[CYRILLIC_SUPPLEMENTARY] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#DESERET[DESERET] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#DEVANAGARI[DEVANAGARI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#DEVANAGARI_EXTENDED[DEVANAGARI_EXTENDED] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#DINGBATS[DINGBATS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#DOMINO_TILES[DOMINO_TILES] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#EGYPTIAN_HIEROGLYPHS[EGYPTIAN_HIEROGLYPHS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#EMOTICONS[EMOTICONS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ENCLOSED_ALPHANUMERICS[ENCLOSED_ALPHANUMERICS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ENCLOSED_ALPHANUMERIC_SUPPLEMENT[ENCLOSED_ALPHANUMERIC_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ENCLOSED_CJK_LETTERS_AND_MONTHS[ENCLOSED_CJK_LETTERS_AND_MONTHS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ENCLOSED_IDEOGRAPHIC_SUPPLEMENT[ENCLOSED_IDEOGRAPHIC_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ETHIOPIC[ETHIOPIC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ETHIOPIC_EXTENDED[ETHIOPIC_EXTENDED] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ETHIOPIC_EXTENDED_A[ETHIOPIC_EXTENDED_A] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ETHIOPIC_SUPPLEMENT[ETHIOPIC_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#GENERAL_PUNCTUATION[GENERAL_PUNCTUATION] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#GEOMETRIC_SHAPES[GEOMETRIC_SHAPES] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#GEORGIAN[GEORGIAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#GEORGIAN_SUPPLEMENT[GEORGIAN_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#GLAGOLITIC[GLAGOLITIC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#GOTHIC[GOTHIC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#GREEK[GREEK] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#GREEK_EXTENDED[GREEK_EXTENDED] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#GUJARATI[GUJARATI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#GURMUKHI[GURMUKHI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#HALFWIDTH_AND_FULLWIDTH_FORMS[HALFWIDTH_AND_FULLWIDTH_FORMS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#HANGUL_COMPATIBILITY_JAMO[HANGUL_COMPATIBILITY_JAMO] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#HANGUL_JAMO[HANGUL_JAMO] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#HANGUL_JAMO_EXTENDED_A[HANGUL_JAMO_EXTENDED_A] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#HANGUL_JAMO_EXTENDED_B[HANGUL_JAMO_EXTENDED_B] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#HANGUL_SYLLABLES[HANGUL_SYLLABLES] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#HANUNOO[HANUNOO] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#HEBREW[HEBREW] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#HIGH_PRIVATE_USE_SURROGATES[HIGH_PRIVATE_USE_SURROGATES] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#HIGH_SURROGATES[HIGH_SURROGATES] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#HIRAGANA[HIRAGANA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#IDEOGRAPHIC_DESCRIPTION_CHARACTERS[IDEOGRAPHIC_DESCRIPTION_CHARACTERS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#IMPERIAL_ARAMAIC[IMPERIAL_ARAMAIC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#INSCRIPTIONAL_PAHLAVI[INSCRIPTIONAL_PAHLAVI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#INSCRIPTIONAL_PARTHIAN[INSCRIPTIONAL_PARTHIAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#IPA_EXTENSIONS[IPA_EXTENSIONS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#JAVANESE[JAVANESE] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#KAITHI[KAITHI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#KANA_SUPPLEMENT[KANA_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#KANBUN[KANBUN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#KANGXI_RADICALS[KANGXI_RADICALS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#KANNADA[KANNADA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#KATAKANA[KATAKANA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#KATAKANA_PHONETIC_EXTENSIONS[KATAKANA_PHONETIC_EXTENSIONS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#KAYAH_LI[KAYAH_LI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#KHAROSHTHI[KHAROSHTHI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#KHMER[KHMER] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#KHMER_SYMBOLS[KHMER_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LAO[LAO] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LATIN_1_SUPPLEMENT[LATIN_1_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LATIN_EXTENDED_A[LATIN_EXTENDED_A] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LATIN_EXTENDED_ADDITIONAL[LATIN_EXTENDED_ADDITIONAL] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LATIN_EXTENDED_B[LATIN_EXTENDED_B] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LATIN_EXTENDED_C[LATIN_EXTENDED_C] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LATIN_EXTENDED_D[LATIN_EXTENDED_D] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LEPCHA[LEPCHA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LETTERLIKE_SYMBOLS[LETTERLIKE_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LIMBU[LIMBU] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LINEAR_B_IDEOGRAMS[LINEAR_B_IDEOGRAMS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LINEAR_B_SYLLABARY[LINEAR_B_SYLLABARY] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LISU[LISU] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LOW_SURROGATES[LOW_SURROGATES] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LYCIAN[LYCIAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#LYDIAN[LYDIAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MAHJONG_TILES[MAHJONG_TILES] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MALAYALAM[MALAYALAM] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MANDAIC[MANDAIC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MATHEMATICAL_ALPHANUMERIC_SYMBOLS[MATHEMATICAL_ALPHANUMERIC_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MATHEMATICAL_OPERATORS[MATHEMATICAL_OPERATORS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MEETEI_MAYEK[MEETEI_MAYEK] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MEETEI_MAYEK_EXTENSIONS[MEETEI_MAYEK_EXTENSIONS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MEROITIC_CURSIVE[MEROITIC_CURSIVE] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MEROITIC_HIEROGLYPHS[MEROITIC_HIEROGLYPHS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MIAO[MIAO] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A[MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B[MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MISCELLANEOUS_SYMBOLS[MISCELLANEOUS_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MISCELLANEOUS_SYMBOLS_AND_ARROWS[MISCELLANEOUS_SYMBOLS_AND_ARROWS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS[MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MISCELLANEOUS_TECHNICAL[MISCELLANEOUS_TECHNICAL] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MODIFIER_TONE_LETTERS[MODIFIER_TONE_LETTERS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MONGOLIAN[MONGOLIAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MUSICAL_SYMBOLS[MUSICAL_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MYANMAR[MYANMAR] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#MYANMAR_EXTENDED_A[MYANMAR_EXTENDED_A] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#NEW_TAI_LUE[NEW_TAI_LUE] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#NKO[NKO] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#NUMBER_FORMS[NUMBER_FORMS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#OGHAM[OGHAM] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#OLD_ITALIC[OLD_ITALIC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#OLD_PERSIAN[OLD_PERSIAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#OLD_SOUTH_ARABIAN[OLD_SOUTH_ARABIAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#OLD_TURKIC[OLD_TURKIC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#OL_CHIKI[OL_CHIKI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#OPTICAL_CHARACTER_RECOGNITION[OPTICAL_CHARACTER_RECOGNITION] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#ORIYA[ORIYA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#OSMANYA[OSMANYA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#PHAGS_PA[PHAGS_PA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#PHAISTOS_DISC[PHAISTOS_DISC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#PHOENICIAN[PHOENICIAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#PHONETIC_EXTENSIONS[PHONETIC_EXTENSIONS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#PHONETIC_EXTENSIONS_SUPPLEMENT[PHONETIC_EXTENSIONS_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#PLAYING_CARDS[PLAYING_CARDS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#PRIVATE_USE_AREA[PRIVATE_USE_AREA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#REJANG[REJANG] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#RUMI_NUMERAL_SYMBOLS[RUMI_NUMERAL_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#RUNIC[RUNIC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SAMARITAN[SAMARITAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SAURASHTRA[SAURASHTRA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SHARADA[SHARADA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SHAVIAN[SHAVIAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SINHALA[SINHALA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SMALL_FORM_VARIANTS[SMALL_FORM_VARIANTS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SORA_SOMPENG[SORA_SOMPENG] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SPACING_MODIFIER_LETTERS[SPACING_MODIFIER_LETTERS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SPECIALS[SPECIALS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SUNDANESE[SUNDANESE] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SUNDANESE_SUPPLEMENT[SUNDANESE_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SUPERSCRIPTS_AND_SUBSCRIPTS[SUPERSCRIPTS_AND_SUBSCRIPTS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SUPPLEMENTAL_ARROWS_A[SUPPLEMENTAL_ARROWS_A] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SUPPLEMENTAL_ARROWS_B[SUPPLEMENTAL_ARROWS_B] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SUPPLEMENTAL_MATHEMATICAL_OPERATORS[SUPPLEMENTAL_MATHEMATICAL_OPERATORS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SUPPLEMENTAL_PUNCTUATION[SUPPLEMENTAL_PUNCTUATION] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SUPPLEMENTARY_PRIVATE_USE_AREA_A[SUPPLEMENTARY_PRIVATE_USE_AREA_A] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SUPPLEMENTARY_PRIVATE_USE_AREA_B[SUPPLEMENTARY_PRIVATE_USE_AREA_B] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SYLOTI_NAGRI[SYLOTI_NAGRI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#SYRIAC[SYRIAC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TAGALOG[TAGALOG] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TAGBANWA[TAGBANWA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TAGS[TAGS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TAI_LE[TAI_LE] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TAI_THAM[TAI_THAM] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TAI_VIET[TAI_VIET] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TAI_XUAN_JING_SYMBOLS[TAI_XUAN_JING_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TAKRI[TAKRI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TAMIL[TAMIL] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TELUGU[TELUGU] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#THAANA[THAANA] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#THAI[THAI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TIBETAN[TIBETAN] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TIFINAGH[TIFINAGH] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#TRANSPORT_AND_MAP_SYMBOLS[TRANSPORT_AND_MAP_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#UGARITIC[UGARITIC] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS[UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED[UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#VAI[VAI] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#VARIATION_SELECTORS[VARIATION_SELECTORS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#VARIATION_SELECTORS_SUPPLEMENT[VARIATION_SELECTORS_SUPPLEMENT] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#VEDIC_EXTENSIONS[VEDIC_EXTENSIONS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#VERTICAL_FORMS[VERTICAL_FORMS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#YIJING_HEXAGRAM_SYMBOLS[YIJING_HEXAGRAM_SYMBOLS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#YI_RADICALS[YI_RADICALS] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#YI_SYLLABLES[YI_SYLLABLES] -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#forName(java.lang.String)[forName](null) -* static Character.UnicodeBlock {java11-javadoc}/java.base/java/lang/Character$UnicodeBlock.html#of(int)[of](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Character-UnicodeScript]] -==== Character.UnicodeScript -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#ARABIC[ARABIC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#ARMENIAN[ARMENIAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#AVESTAN[AVESTAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#BALINESE[BALINESE] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#BAMUM[BAMUM] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#BATAK[BATAK] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#BENGALI[BENGALI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#BOPOMOFO[BOPOMOFO] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#BRAHMI[BRAHMI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#BRAILLE[BRAILLE] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#BUGINESE[BUGINESE] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#BUHID[BUHID] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#CANADIAN_ABORIGINAL[CANADIAN_ABORIGINAL] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#CARIAN[CARIAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#CHAKMA[CHAKMA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#CHAM[CHAM] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#CHEROKEE[CHEROKEE] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#COMMON[COMMON] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#COPTIC[COPTIC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#CUNEIFORM[CUNEIFORM] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#CYPRIOT[CYPRIOT] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#CYRILLIC[CYRILLIC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#DESERET[DESERET] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#DEVANAGARI[DEVANAGARI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#EGYPTIAN_HIEROGLYPHS[EGYPTIAN_HIEROGLYPHS] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#ETHIOPIC[ETHIOPIC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#GEORGIAN[GEORGIAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#GLAGOLITIC[GLAGOLITIC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#GOTHIC[GOTHIC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#GREEK[GREEK] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#GUJARATI[GUJARATI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#GURMUKHI[GURMUKHI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#HAN[HAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#HANGUL[HANGUL] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#HANUNOO[HANUNOO] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#HEBREW[HEBREW] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#HIRAGANA[HIRAGANA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#IMPERIAL_ARAMAIC[IMPERIAL_ARAMAIC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#INHERITED[INHERITED] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#INSCRIPTIONAL_PAHLAVI[INSCRIPTIONAL_PAHLAVI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#INSCRIPTIONAL_PARTHIAN[INSCRIPTIONAL_PARTHIAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#JAVANESE[JAVANESE] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#KAITHI[KAITHI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#KANNADA[KANNADA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#KATAKANA[KATAKANA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#KAYAH_LI[KAYAH_LI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#KHAROSHTHI[KHAROSHTHI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#KHMER[KHMER] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#LAO[LAO] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#LATIN[LATIN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#LEPCHA[LEPCHA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#LIMBU[LIMBU] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#LINEAR_B[LINEAR_B] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#LISU[LISU] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#LYCIAN[LYCIAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#LYDIAN[LYDIAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#MALAYALAM[MALAYALAM] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#MANDAIC[MANDAIC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#MEETEI_MAYEK[MEETEI_MAYEK] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#MEROITIC_CURSIVE[MEROITIC_CURSIVE] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#MEROITIC_HIEROGLYPHS[MEROITIC_HIEROGLYPHS] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#MIAO[MIAO] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#MONGOLIAN[MONGOLIAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#MYANMAR[MYANMAR] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#NEW_TAI_LUE[NEW_TAI_LUE] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#NKO[NKO] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#OGHAM[OGHAM] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#OLD_ITALIC[OLD_ITALIC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#OLD_PERSIAN[OLD_PERSIAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#OLD_SOUTH_ARABIAN[OLD_SOUTH_ARABIAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#OLD_TURKIC[OLD_TURKIC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#OL_CHIKI[OL_CHIKI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#ORIYA[ORIYA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#OSMANYA[OSMANYA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#PHAGS_PA[PHAGS_PA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#PHOENICIAN[PHOENICIAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#REJANG[REJANG] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#RUNIC[RUNIC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#SAMARITAN[SAMARITAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#SAURASHTRA[SAURASHTRA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#SHARADA[SHARADA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#SHAVIAN[SHAVIAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#SINHALA[SINHALA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#SORA_SOMPENG[SORA_SOMPENG] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#SUNDANESE[SUNDANESE] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#SYLOTI_NAGRI[SYLOTI_NAGRI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#SYRIAC[SYRIAC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#TAGALOG[TAGALOG] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#TAGBANWA[TAGBANWA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#TAI_LE[TAI_LE] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#TAI_THAM[TAI_THAM] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#TAI_VIET[TAI_VIET] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#TAKRI[TAKRI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#TAMIL[TAMIL] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#TELUGU[TELUGU] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#THAANA[THAANA] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#THAI[THAI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#TIBETAN[TIBETAN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#TIFINAGH[TIFINAGH] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#UGARITIC[UGARITIC] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#UNKNOWN[UNKNOWN] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#VAI[VAI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#YI[YI] -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#forName(java.lang.String)[forName](null) -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#of(int)[of](int) -* static Character.UnicodeScript {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#valueOf(java.lang.String)[valueOf](null) -* static Character.UnicodeScript[] {java11-javadoc}/java.base/java/lang/Character$UnicodeScript.html#values()[values]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ClassCastException]] -==== ClassCastException -* {java11-javadoc}/java.base/java/lang/ClassCastException.html#()[ClassCastException]() -* {java11-javadoc}/java.base/java/lang/ClassCastException.html#(java.lang.String)[ClassCastException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ClassNotFoundException]] -==== ClassNotFoundException -* {java11-javadoc}/java.base/java/lang/ClassNotFoundException.html#()[ClassNotFoundException]() -* {java11-javadoc}/java.base/java/lang/ClassNotFoundException.html#(java.lang.String)[ClassNotFoundException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-CloneNotSupportedException]] -==== CloneNotSupportedException -* {java11-javadoc}/java.base/java/lang/CloneNotSupportedException.html#()[CloneNotSupportedException]() -* {java11-javadoc}/java.base/java/lang/CloneNotSupportedException.html#(java.lang.String)[CloneNotSupportedException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Comparable]] -==== Comparable -* int {java11-javadoc}/java.base/java/lang/Comparable.html#compareTo(java.lang.Object)[compareTo](def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Double]] -==== Double -* static int {java11-javadoc}/java.base/java/lang/Double.html#BYTES[BYTES] -* static int {java11-javadoc}/java.base/java/lang/Double.html#MAX_EXPONENT[MAX_EXPONENT] -* static double {java11-javadoc}/java.base/java/lang/Double.html#MAX_VALUE[MAX_VALUE] -* static int {java11-javadoc}/java.base/java/lang/Double.html#MIN_EXPONENT[MIN_EXPONENT] -* static double {java11-javadoc}/java.base/java/lang/Double.html#MIN_NORMAL[MIN_NORMAL] -* static double {java11-javadoc}/java.base/java/lang/Double.html#MIN_VALUE[MIN_VALUE] -* static double {java11-javadoc}/java.base/java/lang/Double.html#NEGATIVE_INFINITY[NEGATIVE_INFINITY] -* static double {java11-javadoc}/java.base/java/lang/Double.html#NaN[NaN] -* static double {java11-javadoc}/java.base/java/lang/Double.html#POSITIVE_INFINITY[POSITIVE_INFINITY] -* static int {java11-javadoc}/java.base/java/lang/Double.html#SIZE[SIZE] -* static int {java11-javadoc}/java.base/java/lang/Double.html#compare(double,double)[compare](double, double) -* static long {java11-javadoc}/java.base/java/lang/Double.html#doubleToLongBits(double)[doubleToLongBits](double) -* static long {java11-javadoc}/java.base/java/lang/Double.html#doubleToRawLongBits(double)[doubleToRawLongBits](double) -* static int {java11-javadoc}/java.base/java/lang/Double.html#hashCode(double)[hashCode](double) -* static boolean {java11-javadoc}/java.base/java/lang/Double.html#isFinite(double)[isFinite](double) -* static boolean {java11-javadoc}/java.base/java/lang/Double.html#isInfinite(double)[isInfinite](double) -* static boolean {java11-javadoc}/java.base/java/lang/Double.html#isNaN(double)[isNaN](double) -* static double {java11-javadoc}/java.base/java/lang/Double.html#longBitsToDouble(long)[longBitsToDouble](long) -* static double {java11-javadoc}/java.base/java/lang/Double.html#max(double,double)[max](double, double) -* static double {java11-javadoc}/java.base/java/lang/Double.html#min(double,double)[min](double, double) -* static double {java11-javadoc}/java.base/java/lang/Double.html#parseDouble(java.lang.String)[parseDouble](null) -* static double {java11-javadoc}/java.base/java/lang/Double.html#sum(double,double)[sum](double, double) -* static null {java11-javadoc}/java.base/java/lang/Double.html#toHexString(double)[toHexString](double) -* static null {java11-javadoc}/java.base/java/lang/Double.html#toString(double)[toString](double) -* static Double {java11-javadoc}/java.base/java/lang/Double.html#valueOf(double)[valueOf](double) -* byte {java11-javadoc}/java.base/java/lang/Number.html#byteValue()[byteValue]() -* int {java11-javadoc}/java.base/java/lang/Double.html#compareTo(java.lang.Double)[compareTo](Double) -* double {java11-javadoc}/java.base/java/lang/Number.html#doubleValue()[doubleValue]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* float {java11-javadoc}/java.base/java/lang/Number.html#floatValue()[floatValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/Number.html#intValue()[intValue]() -* boolean {java11-javadoc}/java.base/java/lang/Double.html#isInfinite()[isInfinite]() -* boolean {java11-javadoc}/java.base/java/lang/Double.html#isNaN()[isNaN]() -* long {java11-javadoc}/java.base/java/lang/Number.html#longValue()[longValue]() -* short {java11-javadoc}/java.base/java/lang/Number.html#shortValue()[shortValue]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Enum]] -==== Enum -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-EnumConstantNotPresentException]] -==== EnumConstantNotPresentException -* null {java11-javadoc}/java.base/java/lang/EnumConstantNotPresentException.html#constantName()[constantName]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Exception]] -==== Exception -* {java11-javadoc}/java.base/java/lang/Exception.html#()[Exception]() -* {java11-javadoc}/java.base/java/lang/Exception.html#(java.lang.String)[Exception](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Float]] -==== Float -* static int {java11-javadoc}/java.base/java/lang/Float.html#BYTES[BYTES] -* static int {java11-javadoc}/java.base/java/lang/Float.html#MAX_EXPONENT[MAX_EXPONENT] -* static float {java11-javadoc}/java.base/java/lang/Float.html#MAX_VALUE[MAX_VALUE] -* static int {java11-javadoc}/java.base/java/lang/Float.html#MIN_EXPONENT[MIN_EXPONENT] -* static float {java11-javadoc}/java.base/java/lang/Float.html#MIN_NORMAL[MIN_NORMAL] -* static float {java11-javadoc}/java.base/java/lang/Float.html#MIN_VALUE[MIN_VALUE] -* static float {java11-javadoc}/java.base/java/lang/Float.html#NEGATIVE_INFINITY[NEGATIVE_INFINITY] -* static float {java11-javadoc}/java.base/java/lang/Float.html#NaN[NaN] -* static float {java11-javadoc}/java.base/java/lang/Float.html#POSITIVE_INFINITY[POSITIVE_INFINITY] -* static int {java11-javadoc}/java.base/java/lang/Float.html#SIZE[SIZE] -* static int {java11-javadoc}/java.base/java/lang/Float.html#compare(float,float)[compare](float, float) -* static int {java11-javadoc}/java.base/java/lang/Float.html#floatToIntBits(float)[floatToIntBits](float) -* static int {java11-javadoc}/java.base/java/lang/Float.html#floatToRawIntBits(float)[floatToRawIntBits](float) -* static int {java11-javadoc}/java.base/java/lang/Float.html#hashCode(float)[hashCode](float) -* static float {java11-javadoc}/java.base/java/lang/Float.html#intBitsToFloat(int)[intBitsToFloat](int) -* static boolean {java11-javadoc}/java.base/java/lang/Float.html#isFinite(float)[isFinite](float) -* static boolean {java11-javadoc}/java.base/java/lang/Float.html#isInfinite(float)[isInfinite](float) -* static boolean {java11-javadoc}/java.base/java/lang/Float.html#isNaN(float)[isNaN](float) -* static float {java11-javadoc}/java.base/java/lang/Float.html#max(float,float)[max](float, float) -* static float {java11-javadoc}/java.base/java/lang/Float.html#min(float,float)[min](float, float) -* static float {java11-javadoc}/java.base/java/lang/Float.html#parseFloat(java.lang.String)[parseFloat](null) -* static float {java11-javadoc}/java.base/java/lang/Float.html#sum(float,float)[sum](float, float) -* static null {java11-javadoc}/java.base/java/lang/Float.html#toHexString(float)[toHexString](float) -* static null {java11-javadoc}/java.base/java/lang/Float.html#toString(float)[toString](float) -* static Float {java11-javadoc}/java.base/java/lang/Float.html#valueOf(float)[valueOf](float) -* byte {java11-javadoc}/java.base/java/lang/Number.html#byteValue()[byteValue]() -* int {java11-javadoc}/java.base/java/lang/Float.html#compareTo(java.lang.Float)[compareTo](Float) -* double {java11-javadoc}/java.base/java/lang/Number.html#doubleValue()[doubleValue]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* float {java11-javadoc}/java.base/java/lang/Number.html#floatValue()[floatValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/Number.html#intValue()[intValue]() -* boolean {java11-javadoc}/java.base/java/lang/Float.html#isInfinite()[isInfinite]() -* boolean {java11-javadoc}/java.base/java/lang/Float.html#isNaN()[isNaN]() -* long {java11-javadoc}/java.base/java/lang/Number.html#longValue()[longValue]() -* short {java11-javadoc}/java.base/java/lang/Number.html#shortValue()[shortValue]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IllegalAccessException]] -==== IllegalAccessException -* {java11-javadoc}/java.base/java/lang/IllegalAccessException.html#()[IllegalAccessException]() -* {java11-javadoc}/java.base/java/lang/IllegalAccessException.html#(java.lang.String)[IllegalAccessException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IllegalArgumentException]] -==== IllegalArgumentException -* {java11-javadoc}/java.base/java/lang/IllegalArgumentException.html#()[IllegalArgumentException]() -* {java11-javadoc}/java.base/java/lang/IllegalArgumentException.html#(java.lang.String)[IllegalArgumentException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IllegalMonitorStateException]] -==== IllegalMonitorStateException -* {java11-javadoc}/java.base/java/lang/IllegalMonitorStateException.html#()[IllegalMonitorStateException]() -* {java11-javadoc}/java.base/java/lang/IllegalMonitorStateException.html#(java.lang.String)[IllegalMonitorStateException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IllegalStateException]] -==== IllegalStateException -* {java11-javadoc}/java.base/java/lang/IllegalStateException.html#()[IllegalStateException]() -* {java11-javadoc}/java.base/java/lang/IllegalStateException.html#(java.lang.String)[IllegalStateException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IllegalThreadStateException]] -==== IllegalThreadStateException -* {java11-javadoc}/java.base/java/lang/IllegalThreadStateException.html#()[IllegalThreadStateException]() -* {java11-javadoc}/java.base/java/lang/IllegalThreadStateException.html#(java.lang.String)[IllegalThreadStateException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IndexOutOfBoundsException]] -==== IndexOutOfBoundsException -* {java11-javadoc}/java.base/java/lang/IndexOutOfBoundsException.html#()[IndexOutOfBoundsException]() -* {java11-javadoc}/java.base/java/lang/IndexOutOfBoundsException.html#(java.lang.String)[IndexOutOfBoundsException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-InstantiationException]] -==== InstantiationException -* {java11-javadoc}/java.base/java/lang/InstantiationException.html#()[InstantiationException]() -* {java11-javadoc}/java.base/java/lang/InstantiationException.html#(java.lang.String)[InstantiationException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Integer]] -==== Integer -* static int {java11-javadoc}/java.base/java/lang/Integer.html#BYTES[BYTES] -* static int {java11-javadoc}/java.base/java/lang/Integer.html#MAX_VALUE[MAX_VALUE] -* static int {java11-javadoc}/java.base/java/lang/Integer.html#MIN_VALUE[MIN_VALUE] -* static int {java11-javadoc}/java.base/java/lang/Integer.html#SIZE[SIZE] -* static int {java11-javadoc}/java.base/java/lang/Integer.html#bitCount(int)[bitCount](int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#compare(int,int)[compare](int, int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#compareUnsigned(int,int)[compareUnsigned](int, int) -* static Integer {java11-javadoc}/java.base/java/lang/Integer.html#decode(java.lang.String)[decode](null) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#divideUnsigned(int,int)[divideUnsigned](int, int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#hashCode(int)[hashCode](int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#highestOneBit(int)[highestOneBit](int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#lowestOneBit(int)[lowestOneBit](int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#max(int,int)[max](int, int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#min(int,int)[min](int, int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#numberOfLeadingZeros(int)[numberOfLeadingZeros](int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#numberOfTrailingZeros(int)[numberOfTrailingZeros](int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#parseInt(java.lang.String)[parseInt](null) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#parseInt(java.lang.String,int)[parseInt](null, int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#parseUnsignedInt(java.lang.String)[parseUnsignedInt](null) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#parseUnsignedInt(java.lang.String,int)[parseUnsignedInt](null, int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#remainderUnsigned(int,int)[remainderUnsigned](int, int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#reverse(int)[reverse](int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#reverseBytes(int)[reverseBytes](int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#rotateLeft(int,int)[rotateLeft](int, int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#rotateRight(int,int)[rotateRight](int, int) -* static int {java11-javadoc}/java.base/java/lang/Integer.html#signum(int)[signum](int) -* static null {java11-javadoc}/java.base/java/lang/Integer.html#toBinaryString(int)[toBinaryString](int) -* static null {java11-javadoc}/java.base/java/lang/Integer.html#toHexString(int)[toHexString](int) -* static null {java11-javadoc}/java.base/java/lang/Integer.html#toOctalString(int)[toOctalString](int) -* static null {java11-javadoc}/java.base/java/lang/Integer.html#toString(int)[toString](int) -* static null {java11-javadoc}/java.base/java/lang/Integer.html#toString(int,int)[toString](int, int) -* static long {java11-javadoc}/java.base/java/lang/Integer.html#toUnsignedLong(int)[toUnsignedLong](int) -* static null {java11-javadoc}/java.base/java/lang/Integer.html#toUnsignedString(int)[toUnsignedString](int) -* static null {java11-javadoc}/java.base/java/lang/Integer.html#toUnsignedString(int,int)[toUnsignedString](int, int) -* static Integer {java11-javadoc}/java.base/java/lang/Integer.html#valueOf(int)[valueOf](int) -* static Integer {java11-javadoc}/java.base/java/lang/Integer.html#valueOf(java.lang.String,int)[valueOf](null, int) -* byte {java11-javadoc}/java.base/java/lang/Number.html#byteValue()[byteValue]() -* int {java11-javadoc}/java.base/java/lang/Integer.html#compareTo(java.lang.Integer)[compareTo](Integer) -* double {java11-javadoc}/java.base/java/lang/Number.html#doubleValue()[doubleValue]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* float {java11-javadoc}/java.base/java/lang/Number.html#floatValue()[floatValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/Number.html#intValue()[intValue]() -* long {java11-javadoc}/java.base/java/lang/Number.html#longValue()[longValue]() -* short {java11-javadoc}/java.base/java/lang/Number.html#shortValue()[shortValue]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-InterruptedException]] -==== InterruptedException -* {java11-javadoc}/java.base/java/lang/InterruptedException.html#()[InterruptedException]() -* {java11-javadoc}/java.base/java/lang/InterruptedException.html#(java.lang.String)[InterruptedException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Iterable]] -==== Iterable -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* Spliterator {java11-javadoc}/java.base/java/lang/Iterable.html#spliterator()[spliterator]() -* double sum() -* double sum(ToDoubleFunction) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Long]] -==== Long -* static int {java11-javadoc}/java.base/java/lang/Long.html#BYTES[BYTES] -* static long {java11-javadoc}/java.base/java/lang/Long.html#MAX_VALUE[MAX_VALUE] -* static long {java11-javadoc}/java.base/java/lang/Long.html#MIN_VALUE[MIN_VALUE] -* static int {java11-javadoc}/java.base/java/lang/Long.html#SIZE[SIZE] -* static int {java11-javadoc}/java.base/java/lang/Long.html#bitCount(long)[bitCount](long) -* static int {java11-javadoc}/java.base/java/lang/Long.html#compare(long,long)[compare](long, long) -* static int {java11-javadoc}/java.base/java/lang/Long.html#compareUnsigned(long,long)[compareUnsigned](long, long) -* static Long {java11-javadoc}/java.base/java/lang/Long.html#decode(java.lang.String)[decode](null) -* static long {java11-javadoc}/java.base/java/lang/Long.html#divideUnsigned(long,long)[divideUnsigned](long, long) -* static int {java11-javadoc}/java.base/java/lang/Long.html#hashCode(long)[hashCode](long) -* static long {java11-javadoc}/java.base/java/lang/Long.html#highestOneBit(long)[highestOneBit](long) -* static long {java11-javadoc}/java.base/java/lang/Long.html#lowestOneBit(long)[lowestOneBit](long) -* static long {java11-javadoc}/java.base/java/lang/Long.html#max(long,long)[max](long, long) -* static long {java11-javadoc}/java.base/java/lang/Long.html#min(long,long)[min](long, long) -* static int {java11-javadoc}/java.base/java/lang/Long.html#numberOfLeadingZeros(long)[numberOfLeadingZeros](long) -* static int {java11-javadoc}/java.base/java/lang/Long.html#numberOfTrailingZeros(long)[numberOfTrailingZeros](long) -* static long {java11-javadoc}/java.base/java/lang/Long.html#parseLong(java.lang.String)[parseLong](null) -* static long {java11-javadoc}/java.base/java/lang/Long.html#parseLong(java.lang.String,int)[parseLong](null, int) -* static long {java11-javadoc}/java.base/java/lang/Long.html#parseUnsignedLong(java.lang.String)[parseUnsignedLong](null) -* static long {java11-javadoc}/java.base/java/lang/Long.html#parseUnsignedLong(java.lang.String,int)[parseUnsignedLong](null, int) -* static long {java11-javadoc}/java.base/java/lang/Long.html#remainderUnsigned(long,long)[remainderUnsigned](long, long) -* static long {java11-javadoc}/java.base/java/lang/Long.html#reverse(long)[reverse](long) -* static long {java11-javadoc}/java.base/java/lang/Long.html#reverseBytes(long)[reverseBytes](long) -* static long {java11-javadoc}/java.base/java/lang/Long.html#rotateLeft(long,int)[rotateLeft](long, int) -* static long {java11-javadoc}/java.base/java/lang/Long.html#rotateRight(long,int)[rotateRight](long, int) -* static int {java11-javadoc}/java.base/java/lang/Long.html#signum(long)[signum](long) -* static long {java11-javadoc}/java.base/java/lang/Long.html#sum(long,long)[sum](long, long) -* static null {java11-javadoc}/java.base/java/lang/Long.html#toBinaryString(long)[toBinaryString](long) -* static null {java11-javadoc}/java.base/java/lang/Long.html#toHexString(long)[toHexString](long) -* static null {java11-javadoc}/java.base/java/lang/Long.html#toOctalString(long)[toOctalString](long) -* static null {java11-javadoc}/java.base/java/lang/Long.html#toString(long)[toString](long) -* static null {java11-javadoc}/java.base/java/lang/Long.html#toString(long,int)[toString](long, int) -* static null {java11-javadoc}/java.base/java/lang/Long.html#toUnsignedString(long)[toUnsignedString](long) -* static null {java11-javadoc}/java.base/java/lang/Long.html#toUnsignedString(long,int)[toUnsignedString](long, int) -* static Long {java11-javadoc}/java.base/java/lang/Long.html#valueOf(long)[valueOf](long) -* static Long {java11-javadoc}/java.base/java/lang/Long.html#valueOf(java.lang.String,int)[valueOf](null, int) -* byte {java11-javadoc}/java.base/java/lang/Number.html#byteValue()[byteValue]() -* int {java11-javadoc}/java.base/java/lang/Long.html#compareTo(java.lang.Long)[compareTo](Long) -* double {java11-javadoc}/java.base/java/lang/Number.html#doubleValue()[doubleValue]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* float {java11-javadoc}/java.base/java/lang/Number.html#floatValue()[floatValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/Number.html#intValue()[intValue]() -* long {java11-javadoc}/java.base/java/lang/Number.html#longValue()[longValue]() -* short {java11-javadoc}/java.base/java/lang/Number.html#shortValue()[shortValue]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Math]] -==== Math -* static double {java11-javadoc}/java.base/java/lang/Math.html#E[E] -* static double {java11-javadoc}/java.base/java/lang/Math.html#PI[PI] -* static double {java11-javadoc}/java.base/java/lang/Math.html#IEEEremainder(double,double)[IEEEremainder](double, double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#abs(double)[abs](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#acos(double)[acos](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#asin(double)[asin](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#atan(double)[atan](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#atan2(double,double)[atan2](double, double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#cbrt(double)[cbrt](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#ceil(double)[ceil](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#copySign(double,double)[copySign](double, double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#cos(double)[cos](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#cosh(double)[cosh](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#exp(double)[exp](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#expm1(double)[expm1](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#floor(double)[floor](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#hypot(double,double)[hypot](double, double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#log(double)[log](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#log10(double)[log10](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#log1p(double)[log1p](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#max(double,double)[max](double, double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#min(double,double)[min](double, double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#nextAfter(double,double)[nextAfter](double, double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#nextDown(double)[nextDown](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#nextUp(double)[nextUp](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#pow(double,double)[pow](double, double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#random()[random]() -* static double {java11-javadoc}/java.base/java/lang/Math.html#rint(double)[rint](double) -* static long {java11-javadoc}/java.base/java/lang/Math.html#round(double)[round](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#scalb(double,int)[scalb](double, int) -* static double {java11-javadoc}/java.base/java/lang/Math.html#signum(double)[signum](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#sin(double)[sin](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#sinh(double)[sinh](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#sqrt(double)[sqrt](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#tan(double)[tan](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#tanh(double)[tanh](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#toDegrees(double)[toDegrees](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#toRadians(double)[toRadians](double) -* static double {java11-javadoc}/java.base/java/lang/Math.html#ulp(double)[ulp](double) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-NegativeArraySizeException]] -==== NegativeArraySizeException -* {java11-javadoc}/java.base/java/lang/NegativeArraySizeException.html#()[NegativeArraySizeException]() -* {java11-javadoc}/java.base/java/lang/NegativeArraySizeException.html#(java.lang.String)[NegativeArraySizeException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-NoSuchFieldException]] -==== NoSuchFieldException -* {java11-javadoc}/java.base/java/lang/NoSuchFieldException.html#()[NoSuchFieldException]() -* {java11-javadoc}/java.base/java/lang/NoSuchFieldException.html#(java.lang.String)[NoSuchFieldException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-NoSuchMethodException]] -==== NoSuchMethodException -* {java11-javadoc}/java.base/java/lang/NoSuchMethodException.html#()[NoSuchMethodException]() -* {java11-javadoc}/java.base/java/lang/NoSuchMethodException.html#(java.lang.String)[NoSuchMethodException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-NullPointerException]] -==== NullPointerException -* {java11-javadoc}/java.base/java/lang/NullPointerException.html#()[NullPointerException]() -* {java11-javadoc}/java.base/java/lang/NullPointerException.html#(java.lang.String)[NullPointerException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Number]] -==== Number -* byte {java11-javadoc}/java.base/java/lang/Number.html#byteValue()[byteValue]() -* double {java11-javadoc}/java.base/java/lang/Number.html#doubleValue()[doubleValue]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* float {java11-javadoc}/java.base/java/lang/Number.html#floatValue()[floatValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/Number.html#intValue()[intValue]() -* long {java11-javadoc}/java.base/java/lang/Number.html#longValue()[longValue]() -* short {java11-javadoc}/java.base/java/lang/Number.html#shortValue()[shortValue]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-NumberFormatException]] -==== NumberFormatException -* {java11-javadoc}/java.base/java/lang/NumberFormatException.html#()[NumberFormatException]() -* {java11-javadoc}/java.base/java/lang/NumberFormatException.html#(java.lang.String)[NumberFormatException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Object]] -==== Object -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ReflectiveOperationException]] -==== ReflectiveOperationException -* {java11-javadoc}/java.base/java/lang/ReflectiveOperationException.html#()[ReflectiveOperationException]() -* {java11-javadoc}/java.base/java/lang/ReflectiveOperationException.html#(java.lang.String)[ReflectiveOperationException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-RuntimeException]] -==== RuntimeException -* {java11-javadoc}/java.base/java/lang/RuntimeException.html#()[RuntimeException]() -* {java11-javadoc}/java.base/java/lang/RuntimeException.html#(java.lang.String)[RuntimeException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-SecurityException]] -==== SecurityException -* {java11-javadoc}/java.base/java/lang/SecurityException.html#()[SecurityException]() -* {java11-javadoc}/java.base/java/lang/SecurityException.html#(java.lang.String)[SecurityException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Short]] -==== Short -* static int {java11-javadoc}/java.base/java/lang/Short.html#BYTES[BYTES] -* static short {java11-javadoc}/java.base/java/lang/Short.html#MAX_VALUE[MAX_VALUE] -* static short {java11-javadoc}/java.base/java/lang/Short.html#MIN_VALUE[MIN_VALUE] -* static int {java11-javadoc}/java.base/java/lang/Short.html#SIZE[SIZE] -* static int {java11-javadoc}/java.base/java/lang/Short.html#compare(short,short)[compare](short, short) -* static Short {java11-javadoc}/java.base/java/lang/Short.html#decode(java.lang.String)[decode](null) -* static int {java11-javadoc}/java.base/java/lang/Short.html#hashCode(short)[hashCode](short) -* static short {java11-javadoc}/java.base/java/lang/Short.html#parseShort(java.lang.String)[parseShort](null) -* static short {java11-javadoc}/java.base/java/lang/Short.html#parseShort(java.lang.String,int)[parseShort](null, int) -* static short {java11-javadoc}/java.base/java/lang/Short.html#reverseBytes(short)[reverseBytes](short) -* static null {java11-javadoc}/java.base/java/lang/Short.html#toString(short)[toString](short) -* static int {java11-javadoc}/java.base/java/lang/Short.html#toUnsignedInt(short)[toUnsignedInt](short) -* static long {java11-javadoc}/java.base/java/lang/Short.html#toUnsignedLong(short)[toUnsignedLong](short) -* static Short {java11-javadoc}/java.base/java/lang/Short.html#valueOf(short)[valueOf](short) -* static Short {java11-javadoc}/java.base/java/lang/Short.html#valueOf(java.lang.String,int)[valueOf](null, int) -* byte {java11-javadoc}/java.base/java/lang/Number.html#byteValue()[byteValue]() -* int {java11-javadoc}/java.base/java/lang/Short.html#compareTo(java.lang.Short)[compareTo](Short) -* double {java11-javadoc}/java.base/java/lang/Number.html#doubleValue()[doubleValue]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* float {java11-javadoc}/java.base/java/lang/Number.html#floatValue()[floatValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/Number.html#intValue()[intValue]() -* long {java11-javadoc}/java.base/java/lang/Number.html#longValue()[longValue]() -* short {java11-javadoc}/java.base/java/lang/Number.html#shortValue()[shortValue]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-StackTraceElement]] -==== StackTraceElement -* {java11-javadoc}/java.base/java/lang/StackTraceElement.html#(java.lang.String,java.lang.String,java.lang.String,int)[StackTraceElement](null, null, null, int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/StackTraceElement.html#getClassName()[getClassName]() -* null {java11-javadoc}/java.base/java/lang/StackTraceElement.html#getFileName()[getFileName]() -* int {java11-javadoc}/java.base/java/lang/StackTraceElement.html#getLineNumber()[getLineNumber]() -* null {java11-javadoc}/java.base/java/lang/StackTraceElement.html#getMethodName()[getMethodName]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/lang/StackTraceElement.html#isNativeMethod()[isNativeMethod]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-StrictMath]] -==== StrictMath -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#E[E] -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#PI[PI] -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#IEEEremainder(double,double)[IEEEremainder](double, double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#abs(double)[abs](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#acos(double)[acos](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#asin(double)[asin](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#atan(double)[atan](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#atan2(double,double)[atan2](double, double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#cbrt(double)[cbrt](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#ceil(double)[ceil](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#copySign(double,double)[copySign](double, double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#cos(double)[cos](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#cosh(double)[cosh](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#exp(double)[exp](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#expm1(double)[expm1](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#floor(double)[floor](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#hypot(double,double)[hypot](double, double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#log(double)[log](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#log10(double)[log10](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#log1p(double)[log1p](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#max(double,double)[max](double, double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#min(double,double)[min](double, double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#nextAfter(double,double)[nextAfter](double, double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#nextDown(double)[nextDown](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#nextUp(double)[nextUp](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#pow(double,double)[pow](double, double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#random()[random]() -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#rint(double)[rint](double) -* static long {java11-javadoc}/java.base/java/lang/StrictMath.html#round(double)[round](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#scalb(double,int)[scalb](double, int) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#signum(double)[signum](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#sin(double)[sin](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#sinh(double)[sinh](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#sqrt(double)[sqrt](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#tan(double)[tan](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#tanh(double)[tanh](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#toDegrees(double)[toDegrees](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#toRadians(double)[toRadians](double) -* static double {java11-javadoc}/java.base/java/lang/StrictMath.html#ulp(double)[ulp](double) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-StringBuffer]] -==== StringBuffer -* {java11-javadoc}/java.base/java/lang/StringBuffer.html#()[StringBuffer]() -* {java11-javadoc}/java.base/java/lang/StringBuffer.html#(java.lang.CharSequence)[StringBuffer](CharSequence) -* StringBuffer {java11-javadoc}/java.base/java/lang/StringBuffer.html#append(java.lang.Object)[append](def) -* StringBuffer {java11-javadoc}/java.base/java/lang/StringBuffer.html#append(java.lang.CharSequence,int,int)[append](CharSequence, int, int) -* StringBuffer {java11-javadoc}/java.base/java/lang/StringBuffer.html#appendCodePoint(int)[appendCodePoint](int) -* int {java11-javadoc}/java.base/java/lang/StringBuffer.html#capacity()[capacity]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/StringBuffer.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/StringBuffer.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/StringBuffer.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/Comparable.html#compareTo(java.lang.Object)[compareTo](def) -* StringBuffer {java11-javadoc}/java.base/java/lang/StringBuffer.html#delete(int,int)[delete](int, int) -* StringBuffer {java11-javadoc}/java.base/java/lang/StringBuffer.html#deleteCharAt(int)[deleteCharAt](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* void {java11-javadoc}/java.base/java/lang/StringBuffer.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/StringBuffer.html#indexOf(java.lang.String)[indexOf](null) -* int {java11-javadoc}/java.base/java/lang/StringBuffer.html#indexOf(java.lang.String,int)[indexOf](null, int) -* StringBuffer {java11-javadoc}/java.base/java/lang/StringBuffer.html#insert(int,java.lang.Object)[insert](int, def) -* int {java11-javadoc}/java.base/java/lang/StringBuffer.html#lastIndexOf(java.lang.String)[lastIndexOf](null) -* int {java11-javadoc}/java.base/java/lang/StringBuffer.html#lastIndexOf(java.lang.String,int)[lastIndexOf](null, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/StringBuffer.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* StringBuffer {java11-javadoc}/java.base/java/lang/StringBuffer.html#replace(int,int,java.lang.String)[replace](int, int, null) -* null replaceAll(Pattern, Function) -* null replaceFirst(Pattern, Function) -* StringBuffer {java11-javadoc}/java.base/java/lang/StringBuffer.html#reverse()[reverse]() -* void {java11-javadoc}/java.base/java/lang/StringBuffer.html#setCharAt(int,char)[setCharAt](int, char) -* void {java11-javadoc}/java.base/java/lang/StringBuffer.html#setLength(int)[setLength](int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* null {java11-javadoc}/java.base/java/lang/StringBuffer.html#substring(int)[substring](int) -* null {java11-javadoc}/java.base/java/lang/StringBuffer.html#substring(int,int)[substring](int, int) -* null {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() - - -[[painless-api-reference-shared-StringBuilder]] -==== StringBuilder -* {java11-javadoc}/java.base/java/lang/StringBuilder.html#()[StringBuilder]() -* {java11-javadoc}/java.base/java/lang/StringBuilder.html#(java.lang.CharSequence)[StringBuilder](CharSequence) -* StringBuilder {java11-javadoc}/java.base/java/lang/StringBuilder.html#append(java.lang.Object)[append](def) -* StringBuilder {java11-javadoc}/java.base/java/lang/StringBuilder.html#append(java.lang.CharSequence,int,int)[append](CharSequence, int, int) -* StringBuilder {java11-javadoc}/java.base/java/lang/StringBuilder.html#appendCodePoint(int)[appendCodePoint](int) -* int {java11-javadoc}/java.base/java/lang/StringBuilder.html#capacity()[capacity]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/StringBuilder.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/StringBuilder.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/StringBuilder.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/Comparable.html#compareTo(java.lang.Object)[compareTo](def) -* StringBuilder {java11-javadoc}/java.base/java/lang/StringBuilder.html#delete(int,int)[delete](int, int) -* StringBuilder {java11-javadoc}/java.base/java/lang/StringBuilder.html#deleteCharAt(int)[deleteCharAt](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* void {java11-javadoc}/java.base/java/lang/StringBuilder.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/StringBuilder.html#indexOf(java.lang.String)[indexOf](null) -* int {java11-javadoc}/java.base/java/lang/StringBuilder.html#indexOf(java.lang.String,int)[indexOf](null, int) -* StringBuilder {java11-javadoc}/java.base/java/lang/StringBuilder.html#insert(int,java.lang.Object)[insert](int, def) -* int {java11-javadoc}/java.base/java/lang/StringBuilder.html#lastIndexOf(java.lang.String)[lastIndexOf](null) -* int {java11-javadoc}/java.base/java/lang/StringBuilder.html#lastIndexOf(java.lang.String,int)[lastIndexOf](null, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/StringBuilder.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* StringBuilder {java11-javadoc}/java.base/java/lang/StringBuilder.html#replace(int,int,java.lang.String)[replace](int, int, null) -* null replaceAll(Pattern, Function) -* null replaceFirst(Pattern, Function) -* StringBuilder {java11-javadoc}/java.base/java/lang/StringBuilder.html#reverse()[reverse]() -* void {java11-javadoc}/java.base/java/lang/StringBuilder.html#setCharAt(int,char)[setCharAt](int, char) -* void {java11-javadoc}/java.base/java/lang/StringBuilder.html#setLength(int)[setLength](int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* null {java11-javadoc}/java.base/java/lang/StringBuilder.html#substring(int)[substring](int) -* null {java11-javadoc}/java.base/java/lang/StringBuilder.html#substring(int,int)[substring](int, int) -* null {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() - - -[[painless-api-reference-shared-StringIndexOutOfBoundsException]] -==== StringIndexOutOfBoundsException -* {java11-javadoc}/java.base/java/lang/StringIndexOutOfBoundsException.html#()[StringIndexOutOfBoundsException]() -* {java11-javadoc}/java.base/java/lang/StringIndexOutOfBoundsException.html#(java.lang.String)[StringIndexOutOfBoundsException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-System]] -==== System -* static void {java11-javadoc}/java.base/java/lang/System.html#arraycopy(java.lang.Object,int,java.lang.Object,int,int)[arraycopy](Object, int, Object, int, int) -* static long {java11-javadoc}/java.base/java/lang/System.html#currentTimeMillis()[currentTimeMillis]() -* static long {java11-javadoc}/java.base/java/lang/System.html#nanoTime()[nanoTime]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-TypeNotPresentException]] -==== TypeNotPresentException -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* null {java11-javadoc}/java.base/java/lang/TypeNotPresentException.html#typeName()[typeName]() - - -[[painless-api-reference-shared-UnsupportedOperationException]] -==== UnsupportedOperationException -* {java11-javadoc}/java.base/java/lang/UnsupportedOperationException.html#()[UnsupportedOperationException]() -* {java11-javadoc}/java.base/java/lang/UnsupportedOperationException.html#(java.lang.String)[UnsupportedOperationException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Void]] -==== Void -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-java-math"] -=== Shared API for package java.math -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-BigDecimal]] -==== BigDecimal -* static BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#ONE[ONE] -* static BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#TEN[TEN] -* static BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#ZERO[ZERO] -* static BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#valueOf(double)[valueOf](double) -* {java11-javadoc}/java.base/java/math/BigDecimal.html#(java.lang.String)[BigDecimal](null) -* {java11-javadoc}/java.base/java/math/BigDecimal.html#(java.lang.String,java.math.MathContext)[BigDecimal](null, MathContext) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#abs()[abs]() -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#abs(java.math.MathContext)[abs](MathContext) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#add(java.math.BigDecimal)[add](BigDecimal) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#add(java.math.BigDecimal,java.math.MathContext)[add](BigDecimal, MathContext) -* byte {java11-javadoc}/java.base/java/lang/Number.html#byteValue()[byteValue]() -* byte {java11-javadoc}/java.base/java/math/BigDecimal.html#byteValueExact()[byteValueExact]() -* int {java11-javadoc}/java.base/java/math/BigDecimal.html#compareTo(java.math.BigDecimal)[compareTo](BigDecimal) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#divide(java.math.BigDecimal)[divide](BigDecimal) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#divide(java.math.BigDecimal,java.math.MathContext)[divide](BigDecimal, MathContext) -* BigDecimal[] {java11-javadoc}/java.base/java/math/BigDecimal.html#divideAndRemainder(java.math.BigDecimal)[divideAndRemainder](BigDecimal) -* BigDecimal[] {java11-javadoc}/java.base/java/math/BigDecimal.html#divideAndRemainder(java.math.BigDecimal,java.math.MathContext)[divideAndRemainder](BigDecimal, MathContext) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#divideToIntegralValue(java.math.BigDecimal)[divideToIntegralValue](BigDecimal) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#divideToIntegralValue(java.math.BigDecimal,java.math.MathContext)[divideToIntegralValue](BigDecimal, MathContext) -* double {java11-javadoc}/java.base/java/lang/Number.html#doubleValue()[doubleValue]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* float {java11-javadoc}/java.base/java/lang/Number.html#floatValue()[floatValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/Number.html#intValue()[intValue]() -* int {java11-javadoc}/java.base/java/math/BigDecimal.html#intValueExact()[intValueExact]() -* long {java11-javadoc}/java.base/java/lang/Number.html#longValue()[longValue]() -* long {java11-javadoc}/java.base/java/math/BigDecimal.html#longValueExact()[longValueExact]() -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#max(java.math.BigDecimal)[max](BigDecimal) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#min(java.math.BigDecimal)[min](BigDecimal) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#movePointLeft(int)[movePointLeft](int) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#movePointRight(int)[movePointRight](int) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#multiply(java.math.BigDecimal)[multiply](BigDecimal) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#multiply(java.math.BigDecimal,java.math.MathContext)[multiply](BigDecimal, MathContext) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#negate()[negate]() -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#negate(java.math.MathContext)[negate](MathContext) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#plus()[plus]() -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#plus(java.math.MathContext)[plus](MathContext) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#pow(int)[pow](int) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#pow(int,java.math.MathContext)[pow](int, MathContext) -* int {java11-javadoc}/java.base/java/math/BigDecimal.html#precision()[precision]() -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#remainder(java.math.BigDecimal)[remainder](BigDecimal) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#remainder(java.math.BigDecimal,java.math.MathContext)[remainder](BigDecimal, MathContext) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#round(java.math.MathContext)[round](MathContext) -* int {java11-javadoc}/java.base/java/math/BigDecimal.html#scale()[scale]() -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#scaleByPowerOfTen(int)[scaleByPowerOfTen](int) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#setScale(int)[setScale](int) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#setScale(int,java.math.RoundingMode)[setScale](int, RoundingMode) -* short {java11-javadoc}/java.base/java/lang/Number.html#shortValue()[shortValue]() -* short {java11-javadoc}/java.base/java/math/BigDecimal.html#shortValueExact()[shortValueExact]() -* int {java11-javadoc}/java.base/java/math/BigDecimal.html#signum()[signum]() -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#stripTrailingZeros()[stripTrailingZeros]() -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#subtract(java.math.BigDecimal)[subtract](BigDecimal) -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#subtract(java.math.BigDecimal,java.math.MathContext)[subtract](BigDecimal, MathContext) -* BigInteger {java11-javadoc}/java.base/java/math/BigDecimal.html#toBigInteger()[toBigInteger]() -* BigInteger {java11-javadoc}/java.base/java/math/BigDecimal.html#toBigIntegerExact()[toBigIntegerExact]() -* null {java11-javadoc}/java.base/java/math/BigDecimal.html#toEngineeringString()[toEngineeringString]() -* null {java11-javadoc}/java.base/java/math/BigDecimal.html#toPlainString()[toPlainString]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* BigDecimal {java11-javadoc}/java.base/java/math/BigDecimal.html#ulp()[ulp]() - - -[[painless-api-reference-shared-BigInteger]] -==== BigInteger -* static BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#ONE[ONE] -* static BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#TEN[TEN] -* static BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#ZERO[ZERO] -* static BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#valueOf(long)[valueOf](long) -* {java11-javadoc}/java.base/java/math/BigInteger.html#(java.lang.String)[BigInteger](null) -* {java11-javadoc}/java.base/java/math/BigInteger.html#(java.lang.String,int)[BigInteger](null, int) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#abs()[abs]() -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#add(java.math.BigInteger)[add](BigInteger) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#and(java.math.BigInteger)[and](BigInteger) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#andNot(java.math.BigInteger)[andNot](BigInteger) -* int {java11-javadoc}/java.base/java/math/BigInteger.html#bitCount()[bitCount]() -* int {java11-javadoc}/java.base/java/math/BigInteger.html#bitLength()[bitLength]() -* byte {java11-javadoc}/java.base/java/lang/Number.html#byteValue()[byteValue]() -* byte {java11-javadoc}/java.base/java/math/BigInteger.html#byteValueExact()[byteValueExact]() -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#clearBit(int)[clearBit](int) -* int {java11-javadoc}/java.base/java/math/BigInteger.html#compareTo(java.math.BigInteger)[compareTo](BigInteger) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#divide(java.math.BigInteger)[divide](BigInteger) -* BigInteger[] {java11-javadoc}/java.base/java/math/BigInteger.html#divideAndRemainder(java.math.BigInteger)[divideAndRemainder](BigInteger) -* double {java11-javadoc}/java.base/java/lang/Number.html#doubleValue()[doubleValue]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#flipBit(int)[flipBit](int) -* float {java11-javadoc}/java.base/java/lang/Number.html#floatValue()[floatValue]() -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#gcd(java.math.BigInteger)[gcd](BigInteger) -* int {java11-javadoc}/java.base/java/math/BigInteger.html#getLowestSetBit()[getLowestSetBit]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/Number.html#intValue()[intValue]() -* int {java11-javadoc}/java.base/java/math/BigInteger.html#intValueExact()[intValueExact]() -* long {java11-javadoc}/java.base/java/lang/Number.html#longValue()[longValue]() -* long {java11-javadoc}/java.base/java/math/BigInteger.html#longValueExact()[longValueExact]() -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#max(java.math.BigInteger)[max](BigInteger) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#min(java.math.BigInteger)[min](BigInteger) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#mod(java.math.BigInteger)[mod](BigInteger) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#modInverse(java.math.BigInteger)[modInverse](BigInteger) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#modPow(java.math.BigInteger,java.math.BigInteger)[modPow](BigInteger, BigInteger) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#multiply(java.math.BigInteger)[multiply](BigInteger) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#negate()[negate]() -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#not()[not]() -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#or(java.math.BigInteger)[or](BigInteger) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#pow(int)[pow](int) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#remainder(java.math.BigInteger)[remainder](BigInteger) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#setBit(int)[setBit](int) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#shiftLeft(int)[shiftLeft](int) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#shiftRight(int)[shiftRight](int) -* short {java11-javadoc}/java.base/java/lang/Number.html#shortValue()[shortValue]() -* short {java11-javadoc}/java.base/java/math/BigInteger.html#shortValueExact()[shortValueExact]() -* int {java11-javadoc}/java.base/java/math/BigInteger.html#signum()[signum]() -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#subtract(java.math.BigInteger)[subtract](BigInteger) -* boolean {java11-javadoc}/java.base/java/math/BigInteger.html#testBit(int)[testBit](int) -* byte[] {java11-javadoc}/java.base/java/math/BigInteger.html#toByteArray()[toByteArray]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* null {java11-javadoc}/java.base/java/math/BigInteger.html#toString(int)[toString](int) -* BigInteger {java11-javadoc}/java.base/java/math/BigInteger.html#xor(java.math.BigInteger)[xor](BigInteger) - - -[[painless-api-reference-shared-MathContext]] -==== MathContext -* static MathContext {java11-javadoc}/java.base/java/math/MathContext.html#DECIMAL128[DECIMAL128] -* static MathContext {java11-javadoc}/java.base/java/math/MathContext.html#DECIMAL32[DECIMAL32] -* static MathContext {java11-javadoc}/java.base/java/math/MathContext.html#DECIMAL64[DECIMAL64] -* static MathContext {java11-javadoc}/java.base/java/math/MathContext.html#UNLIMITED[UNLIMITED] -* {java11-javadoc}/java.base/java/math/MathContext.html#(int)[MathContext](int) -* {java11-javadoc}/java.base/java/math/MathContext.html#(int,java.math.RoundingMode)[MathContext](int, RoundingMode) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/math/MathContext.html#getPrecision()[getPrecision]() -* RoundingMode {java11-javadoc}/java.base/java/math/MathContext.html#getRoundingMode()[getRoundingMode]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-RoundingMode]] -==== RoundingMode -* static RoundingMode {java11-javadoc}/java.base/java/math/RoundingMode.html#CEILING[CEILING] -* static RoundingMode {java11-javadoc}/java.base/java/math/RoundingMode.html#DOWN[DOWN] -* static RoundingMode {java11-javadoc}/java.base/java/math/RoundingMode.html#FLOOR[FLOOR] -* static RoundingMode {java11-javadoc}/java.base/java/math/RoundingMode.html#HALF_DOWN[HALF_DOWN] -* static RoundingMode {java11-javadoc}/java.base/java/math/RoundingMode.html#HALF_EVEN[HALF_EVEN] -* static RoundingMode {java11-javadoc}/java.base/java/math/RoundingMode.html#HALF_UP[HALF_UP] -* static RoundingMode {java11-javadoc}/java.base/java/math/RoundingMode.html#UNNECESSARY[UNNECESSARY] -* static RoundingMode {java11-javadoc}/java.base/java/math/RoundingMode.html#UP[UP] -* static RoundingMode {java11-javadoc}/java.base/java/math/RoundingMode.html#valueOf(java.lang.String)[valueOf](null) -* static RoundingMode[] {java11-javadoc}/java.base/java/math/RoundingMode.html#values()[values]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-java-text"] -=== Shared API for package java.text -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-Annotation]] -==== Annotation -* {java11-javadoc}/java.base/java/text/Annotation.html#(java.lang.Object)[Annotation](Object) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* def {java11-javadoc}/java.base/java/text/Annotation.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-AttributedCharacterIterator]] -==== AttributedCharacterIterator -* def {java11-javadoc}/java.base/java/text/CharacterIterator.html#clone()[clone]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#current()[current]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#first()[first]() -* Set {java11-javadoc}/java.base/java/text/AttributedCharacterIterator.html#getAllAttributeKeys()[getAllAttributeKeys]() -* def {java11-javadoc}/java.base/java/text/AttributedCharacterIterator.html#getAttribute(java.text.AttributedCharacterIterator$Attribute)[getAttribute](AttributedCharacterIterator.Attribute) -* Map {java11-javadoc}/java.base/java/text/AttributedCharacterIterator.html#getAttributes()[getAttributes]() -* int {java11-javadoc}/java.base/java/text/CharacterIterator.html#getBeginIndex()[getBeginIndex]() -* int {java11-javadoc}/java.base/java/text/CharacterIterator.html#getEndIndex()[getEndIndex]() -* int {java11-javadoc}/java.base/java/text/CharacterIterator.html#getIndex()[getIndex]() -* int {java11-javadoc}/java.base/java/text/AttributedCharacterIterator.html#getRunLimit()[getRunLimit]() -* int {java11-javadoc}/java.base/java/text/AttributedCharacterIterator.html#getRunLimit(java.util.Set)[getRunLimit](Set) -* int {java11-javadoc}/java.base/java/text/AttributedCharacterIterator.html#getRunStart()[getRunStart]() -* int {java11-javadoc}/java.base/java/text/AttributedCharacterIterator.html#getRunStart(java.util.Set)[getRunStart](Set) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#last()[last]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#next()[next]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#previous()[previous]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#setIndex(int)[setIndex](int) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-AttributedCharacterIterator-Attribute]] -==== AttributedCharacterIterator.Attribute -* static AttributedCharacterIterator.Attribute {java11-javadoc}/java.base/java/text/AttributedCharacterIterator$Attribute.html#INPUT_METHOD_SEGMENT[INPUT_METHOD_SEGMENT] -* static AttributedCharacterIterator.Attribute {java11-javadoc}/java.base/java/text/AttributedCharacterIterator$Attribute.html#LANGUAGE[LANGUAGE] -* static AttributedCharacterIterator.Attribute {java11-javadoc}/java.base/java/text/AttributedCharacterIterator$Attribute.html#READING[READING] -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-AttributedString]] -==== AttributedString -* {java11-javadoc}/java.base/java/text/AttributedString.html#(java.lang.String)[AttributedString](null) -* {java11-javadoc}/java.base/java/text/AttributedString.html#(java.lang.String,java.util.Map)[AttributedString](null, Map) -* void {java11-javadoc}/java.base/java/text/AttributedString.html#addAttribute(java.text.AttributedCharacterIterator$Attribute,java.lang.Object)[addAttribute](AttributedCharacterIterator.Attribute, Object) -* void {java11-javadoc}/java.base/java/text/AttributedString.html#addAttribute(java.text.AttributedCharacterIterator$Attribute,java.lang.Object,int,int)[addAttribute](AttributedCharacterIterator.Attribute, Object, int, int) -* void {java11-javadoc}/java.base/java/text/AttributedString.html#addAttributes(java.util.Map,int,int)[addAttributes](Map, int, int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* AttributedCharacterIterator {java11-javadoc}/java.base/java/text/AttributedString.html#getIterator()[getIterator]() -* AttributedCharacterIterator {java11-javadoc}/java.base/java/text/AttributedString.html#getIterator(java.text.AttributedCharacterIterator$Attribute%5B%5D)[getIterator](AttributedCharacterIterator.Attribute[]) -* AttributedCharacterIterator {java11-javadoc}/java.base/java/text/AttributedString.html#getIterator(java.text.AttributedCharacterIterator$Attribute%5B%5D,int,int)[getIterator](AttributedCharacterIterator.Attribute[], int, int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Bidi]] -==== Bidi -* static int {java11-javadoc}/java.base/java/text/Bidi.html#DIRECTION_DEFAULT_LEFT_TO_RIGHT[DIRECTION_DEFAULT_LEFT_TO_RIGHT] -* static int {java11-javadoc}/java.base/java/text/Bidi.html#DIRECTION_DEFAULT_RIGHT_TO_LEFT[DIRECTION_DEFAULT_RIGHT_TO_LEFT] -* static int {java11-javadoc}/java.base/java/text/Bidi.html#DIRECTION_LEFT_TO_RIGHT[DIRECTION_LEFT_TO_RIGHT] -* static int {java11-javadoc}/java.base/java/text/Bidi.html#DIRECTION_RIGHT_TO_LEFT[DIRECTION_RIGHT_TO_LEFT] -* static void {java11-javadoc}/java.base/java/text/Bidi.html#reorderVisually(byte%5B%5D,int,java.lang.Object%5B%5D,int,int)[reorderVisually](byte[], int, Object[], int, int) -* static boolean {java11-javadoc}/java.base/java/text/Bidi.html#requiresBidi(char%5B%5D,int,int)[requiresBidi](char[], int, int) -* {java11-javadoc}/java.base/java/text/Bidi.html#(java.text.AttributedCharacterIterator)[Bidi](AttributedCharacterIterator) -* {java11-javadoc}/java.base/java/text/Bidi.html#(java.lang.String,int)[Bidi](null, int) -* {java11-javadoc}/java.base/java/text/Bidi.html#(char%5B%5D,int,byte%5B%5D,int,int,int)[Bidi](char[], int, byte[], int, int, int) -* boolean {java11-javadoc}/java.base/java/text/Bidi.html#baseIsLeftToRight()[baseIsLeftToRight]() -* Bidi {java11-javadoc}/java.base/java/text/Bidi.html#createLineBidi(int,int)[createLineBidi](int, int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/text/Bidi.html#getBaseLevel()[getBaseLevel]() -* int {java11-javadoc}/java.base/java/text/Bidi.html#getLength()[getLength]() -* int {java11-javadoc}/java.base/java/text/Bidi.html#getLevelAt(int)[getLevelAt](int) -* int {java11-javadoc}/java.base/java/text/Bidi.html#getRunCount()[getRunCount]() -* int {java11-javadoc}/java.base/java/text/Bidi.html#getRunLevel(int)[getRunLevel](int) -* int {java11-javadoc}/java.base/java/text/Bidi.html#getRunLimit(int)[getRunLimit](int) -* int {java11-javadoc}/java.base/java/text/Bidi.html#getRunStart(int)[getRunStart](int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/text/Bidi.html#isLeftToRight()[isLeftToRight]() -* boolean {java11-javadoc}/java.base/java/text/Bidi.html#isMixed()[isMixed]() -* boolean {java11-javadoc}/java.base/java/text/Bidi.html#isRightToLeft()[isRightToLeft]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-BreakIterator]] -==== BreakIterator -* static int {java11-javadoc}/java.base/java/text/BreakIterator.html#DONE[DONE] -* static Locale[] {java11-javadoc}/java.base/java/text/BreakIterator.html#getAvailableLocales()[getAvailableLocales]() -* static BreakIterator {java11-javadoc}/java.base/java/text/BreakIterator.html#getCharacterInstance()[getCharacterInstance]() -* static BreakIterator {java11-javadoc}/java.base/java/text/BreakIterator.html#getCharacterInstance(java.util.Locale)[getCharacterInstance](Locale) -* static BreakIterator {java11-javadoc}/java.base/java/text/BreakIterator.html#getLineInstance()[getLineInstance]() -* static BreakIterator {java11-javadoc}/java.base/java/text/BreakIterator.html#getLineInstance(java.util.Locale)[getLineInstance](Locale) -* static BreakIterator {java11-javadoc}/java.base/java/text/BreakIterator.html#getSentenceInstance()[getSentenceInstance]() -* static BreakIterator {java11-javadoc}/java.base/java/text/BreakIterator.html#getSentenceInstance(java.util.Locale)[getSentenceInstance](Locale) -* static BreakIterator {java11-javadoc}/java.base/java/text/BreakIterator.html#getWordInstance()[getWordInstance]() -* static BreakIterator {java11-javadoc}/java.base/java/text/BreakIterator.html#getWordInstance(java.util.Locale)[getWordInstance](Locale) -* def {java11-javadoc}/java.base/java/text/BreakIterator.html#clone()[clone]() -* int {java11-javadoc}/java.base/java/text/BreakIterator.html#current()[current]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/text/BreakIterator.html#first()[first]() -* int {java11-javadoc}/java.base/java/text/BreakIterator.html#following(int)[following](int) -* CharacterIterator {java11-javadoc}/java.base/java/text/BreakIterator.html#getText()[getText]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/text/BreakIterator.html#isBoundary(int)[isBoundary](int) -* int {java11-javadoc}/java.base/java/text/BreakIterator.html#last()[last]() -* int {java11-javadoc}/java.base/java/text/BreakIterator.html#next()[next]() -* int {java11-javadoc}/java.base/java/text/BreakIterator.html#next(int)[next](int) -* int {java11-javadoc}/java.base/java/text/BreakIterator.html#preceding(int)[preceding](int) -* int {java11-javadoc}/java.base/java/text/BreakIterator.html#previous()[previous]() -* void {java11-javadoc}/java.base/java/text/BreakIterator.html#setText(java.lang.String)[setText](null) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-CharacterIterator]] -==== CharacterIterator -* static char {java11-javadoc}/java.base/java/text/CharacterIterator.html#DONE[DONE] -* def {java11-javadoc}/java.base/java/text/CharacterIterator.html#clone()[clone]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#current()[current]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#first()[first]() -* int {java11-javadoc}/java.base/java/text/CharacterIterator.html#getBeginIndex()[getBeginIndex]() -* int {java11-javadoc}/java.base/java/text/CharacterIterator.html#getEndIndex()[getEndIndex]() -* int {java11-javadoc}/java.base/java/text/CharacterIterator.html#getIndex()[getIndex]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#last()[last]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#next()[next]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#previous()[previous]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#setIndex(int)[setIndex](int) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ChoiceFormat]] -==== ChoiceFormat -* static double {java11-javadoc}/java.base/java/text/ChoiceFormat.html#nextDouble(double)[nextDouble](double) -* static double {java11-javadoc}/java.base/java/text/ChoiceFormat.html#nextDouble(double,boolean)[nextDouble](double, boolean) -* static double {java11-javadoc}/java.base/java/text/ChoiceFormat.html#previousDouble(double)[previousDouble](double) -* {java11-javadoc}/java.base/java/text/ChoiceFormat.html#(java.lang.String)[ChoiceFormat](null) -* {java11-javadoc}/java.base/java/text/ChoiceFormat.html#(double%5B%5D,java.lang.String%5B%5D)[ChoiceFormat](double[], null[]) -* void {java11-javadoc}/java.base/java/text/ChoiceFormat.html#applyPattern(java.lang.String)[applyPattern](null) -* def {java11-javadoc}/java.base/java/text/Format.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object)[format](Object) -* StringBuffer {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object,java.lang.StringBuffer,java.text.FieldPosition)[format](Object, StringBuffer, FieldPosition) -* AttributedCharacterIterator {java11-javadoc}/java.base/java/text/Format.html#formatToCharacterIterator(java.lang.Object)[formatToCharacterIterator](Object) -* Currency {java11-javadoc}/java.base/java/text/NumberFormat.html#getCurrency()[getCurrency]() -* def[] {java11-javadoc}/java.base/java/text/ChoiceFormat.html#getFormats()[getFormats]() -* double[] {java11-javadoc}/java.base/java/text/ChoiceFormat.html#getLimits()[getLimits]() -* int {java11-javadoc}/java.base/java/text/NumberFormat.html#getMaximumFractionDigits()[getMaximumFractionDigits]() -* int {java11-javadoc}/java.base/java/text/NumberFormat.html#getMaximumIntegerDigits()[getMaximumIntegerDigits]() -* int {java11-javadoc}/java.base/java/text/NumberFormat.html#getMinimumFractionDigits()[getMinimumFractionDigits]() -* int {java11-javadoc}/java.base/java/text/NumberFormat.html#getMinimumIntegerDigits()[getMinimumIntegerDigits]() -* RoundingMode {java11-javadoc}/java.base/java/text/NumberFormat.html#getRoundingMode()[getRoundingMode]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/text/NumberFormat.html#isGroupingUsed()[isGroupingUsed]() -* boolean {java11-javadoc}/java.base/java/text/NumberFormat.html#isParseIntegerOnly()[isParseIntegerOnly]() -* Number {java11-javadoc}/java.base/java/text/NumberFormat.html#parse(java.lang.String)[parse](null) -* Number {java11-javadoc}/java.base/java/text/NumberFormat.html#parse(java.lang.String,java.text.ParsePosition)[parse](null, ParsePosition) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String)[parseObject](null) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String,java.text.ParsePosition)[parseObject](null, ParsePosition) -* void {java11-javadoc}/java.base/java/text/ChoiceFormat.html#setChoices(double%5B%5D,java.lang.String%5B%5D)[setChoices](double[], null[]) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setCurrency(java.util.Currency)[setCurrency](Currency) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setGroupingUsed(boolean)[setGroupingUsed](boolean) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setMaximumFractionDigits(int)[setMaximumFractionDigits](int) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setMaximumIntegerDigits(int)[setMaximumIntegerDigits](int) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setMinimumFractionDigits(int)[setMinimumFractionDigits](int) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setMinimumIntegerDigits(int)[setMinimumIntegerDigits](int) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setParseIntegerOnly(boolean)[setParseIntegerOnly](boolean) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setRoundingMode(java.math.RoundingMode)[setRoundingMode](RoundingMode) -* null {java11-javadoc}/java.base/java/text/ChoiceFormat.html#toPattern()[toPattern]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-CollationElementIterator]] -==== CollationElementIterator -* static int {java11-javadoc}/java.base/java/text/CollationElementIterator.html#NULLORDER[NULLORDER] -* static int {java11-javadoc}/java.base/java/text/CollationElementIterator.html#primaryOrder(int)[primaryOrder](int) -* static short {java11-javadoc}/java.base/java/text/CollationElementIterator.html#secondaryOrder(int)[secondaryOrder](int) -* static short {java11-javadoc}/java.base/java/text/CollationElementIterator.html#tertiaryOrder(int)[tertiaryOrder](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/text/CollationElementIterator.html#getMaxExpansion(int)[getMaxExpansion](int) -* int {java11-javadoc}/java.base/java/text/CollationElementIterator.html#getOffset()[getOffset]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/text/CollationElementIterator.html#next()[next]() -* int {java11-javadoc}/java.base/java/text/CollationElementIterator.html#previous()[previous]() -* void {java11-javadoc}/java.base/java/text/CollationElementIterator.html#reset()[reset]() -* void {java11-javadoc}/java.base/java/text/CollationElementIterator.html#setOffset(int)[setOffset](int) -* void {java11-javadoc}/java.base/java/text/CollationElementIterator.html#setText(java.lang.String)[setText](null) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-CollationKey]] -==== CollationKey -* int {java11-javadoc}/java.base/java/text/CollationKey.html#compareTo(java.text.CollationKey)[compareTo](CollationKey) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/text/CollationKey.html#getSourceString()[getSourceString]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* byte[] {java11-javadoc}/java.base/java/text/CollationKey.html#toByteArray()[toByteArray]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Collator]] -==== Collator -* static int {java11-javadoc}/java.base/java/text/Collator.html#CANONICAL_DECOMPOSITION[CANONICAL_DECOMPOSITION] -* static int {java11-javadoc}/java.base/java/text/Collator.html#FULL_DECOMPOSITION[FULL_DECOMPOSITION] -* static int {java11-javadoc}/java.base/java/text/Collator.html#IDENTICAL[IDENTICAL] -* static int {java11-javadoc}/java.base/java/text/Collator.html#NO_DECOMPOSITION[NO_DECOMPOSITION] -* static int {java11-javadoc}/java.base/java/text/Collator.html#PRIMARY[PRIMARY] -* static int {java11-javadoc}/java.base/java/text/Collator.html#SECONDARY[SECONDARY] -* static int {java11-javadoc}/java.base/java/text/Collator.html#TERTIARY[TERTIARY] -* static Locale[] {java11-javadoc}/java.base/java/text/Collator.html#getAvailableLocales()[getAvailableLocales]() -* static Collator {java11-javadoc}/java.base/java/text/Collator.html#getInstance()[getInstance]() -* static Collator {java11-javadoc}/java.base/java/text/Collator.html#getInstance(java.util.Locale)[getInstance](Locale) -* def {java11-javadoc}/java.base/java/text/Collator.html#clone()[clone]() -* int {java11-javadoc}/java.base/java/util/Comparator.html#compare(java.lang.Object,java.lang.Object)[compare](def, def) -* boolean {java11-javadoc}/java.base/java/util/Comparator.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/text/Collator.html#equals(java.lang.String,java.lang.String)[equals](null, null) -* CollationKey {java11-javadoc}/java.base/java/text/Collator.html#getCollationKey(java.lang.String)[getCollationKey](null) -* int {java11-javadoc}/java.base/java/text/Collator.html#getDecomposition()[getDecomposition]() -* int {java11-javadoc}/java.base/java/text/Collator.html#getStrength()[getStrength]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#reversed()[reversed]() -* void {java11-javadoc}/java.base/java/text/Collator.html#setDecomposition(int)[setDecomposition](int) -* void {java11-javadoc}/java.base/java/text/Collator.html#setStrength(int)[setStrength](int) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparing(java.util.Comparator)[thenComparing](Comparator) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparing(java.util.function.Function,java.util.Comparator)[thenComparing](Function, Comparator) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparingDouble(java.util.function.ToDoubleFunction)[thenComparingDouble](ToDoubleFunction) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparingInt(java.util.function.ToIntFunction)[thenComparingInt](ToIntFunction) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparingLong(java.util.function.ToLongFunction)[thenComparingLong](ToLongFunction) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DateFormat]] -==== DateFormat -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#AM_PM_FIELD[AM_PM_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#DATE_FIELD[DATE_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#DAY_OF_WEEK_FIELD[DAY_OF_WEEK_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#DAY_OF_WEEK_IN_MONTH_FIELD[DAY_OF_WEEK_IN_MONTH_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#DAY_OF_YEAR_FIELD[DAY_OF_YEAR_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#DEFAULT[DEFAULT] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#ERA_FIELD[ERA_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#FULL[FULL] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#HOUR0_FIELD[HOUR0_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#HOUR1_FIELD[HOUR1_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#HOUR_OF_DAY0_FIELD[HOUR_OF_DAY0_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#HOUR_OF_DAY1_FIELD[HOUR_OF_DAY1_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#LONG[LONG] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#MEDIUM[MEDIUM] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#MILLISECOND_FIELD[MILLISECOND_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#MINUTE_FIELD[MINUTE_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#MONTH_FIELD[MONTH_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#SECOND_FIELD[SECOND_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#SHORT[SHORT] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#TIMEZONE_FIELD[TIMEZONE_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#WEEK_OF_MONTH_FIELD[WEEK_OF_MONTH_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#WEEK_OF_YEAR_FIELD[WEEK_OF_YEAR_FIELD] -* static int {java11-javadoc}/java.base/java/text/DateFormat.html#YEAR_FIELD[YEAR_FIELD] -* static Locale[] {java11-javadoc}/java.base/java/text/DateFormat.html#getAvailableLocales()[getAvailableLocales]() -* static DateFormat {java11-javadoc}/java.base/java/text/DateFormat.html#getDateInstance()[getDateInstance]() -* static DateFormat {java11-javadoc}/java.base/java/text/DateFormat.html#getDateInstance(int)[getDateInstance](int) -* static DateFormat {java11-javadoc}/java.base/java/text/DateFormat.html#getDateInstance(int,java.util.Locale)[getDateInstance](int, Locale) -* static DateFormat {java11-javadoc}/java.base/java/text/DateFormat.html#getDateTimeInstance()[getDateTimeInstance]() -* static DateFormat {java11-javadoc}/java.base/java/text/DateFormat.html#getDateTimeInstance(int,int)[getDateTimeInstance](int, int) -* static DateFormat {java11-javadoc}/java.base/java/text/DateFormat.html#getDateTimeInstance(int,int,java.util.Locale)[getDateTimeInstance](int, int, Locale) -* static DateFormat {java11-javadoc}/java.base/java/text/DateFormat.html#getInstance()[getInstance]() -* static DateFormat {java11-javadoc}/java.base/java/text/DateFormat.html#getTimeInstance()[getTimeInstance]() -* static DateFormat {java11-javadoc}/java.base/java/text/DateFormat.html#getTimeInstance(int)[getTimeInstance](int) -* static DateFormat {java11-javadoc}/java.base/java/text/DateFormat.html#getTimeInstance(int,java.util.Locale)[getTimeInstance](int, Locale) -* def {java11-javadoc}/java.base/java/text/Format.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object)[format](Object) -* StringBuffer {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object,java.lang.StringBuffer,java.text.FieldPosition)[format](Object, StringBuffer, FieldPosition) -* AttributedCharacterIterator {java11-javadoc}/java.base/java/text/Format.html#formatToCharacterIterator(java.lang.Object)[formatToCharacterIterator](Object) -* Calendar {java11-javadoc}/java.base/java/text/DateFormat.html#getCalendar()[getCalendar]() -* NumberFormat {java11-javadoc}/java.base/java/text/DateFormat.html#getNumberFormat()[getNumberFormat]() -* TimeZone {java11-javadoc}/java.base/java/text/DateFormat.html#getTimeZone()[getTimeZone]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/text/DateFormat.html#isLenient()[isLenient]() -* Date {java11-javadoc}/java.base/java/text/DateFormat.html#parse(java.lang.String)[parse](null) -* Date {java11-javadoc}/java.base/java/text/DateFormat.html#parse(java.lang.String,java.text.ParsePosition)[parse](null, ParsePosition) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String)[parseObject](null) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String,java.text.ParsePosition)[parseObject](null, ParsePosition) -* void {java11-javadoc}/java.base/java/text/DateFormat.html#setCalendar(java.util.Calendar)[setCalendar](Calendar) -* void {java11-javadoc}/java.base/java/text/DateFormat.html#setLenient(boolean)[setLenient](boolean) -* void {java11-javadoc}/java.base/java/text/DateFormat.html#setNumberFormat(java.text.NumberFormat)[setNumberFormat](NumberFormat) -* void {java11-javadoc}/java.base/java/text/DateFormat.html#setTimeZone(java.util.TimeZone)[setTimeZone](TimeZone) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DateFormat-Field]] -==== DateFormat.Field -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#AM_PM[AM_PM] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#DAY_OF_MONTH[DAY_OF_MONTH] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#DAY_OF_WEEK[DAY_OF_WEEK] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#DAY_OF_WEEK_IN_MONTH[DAY_OF_WEEK_IN_MONTH] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#DAY_OF_YEAR[DAY_OF_YEAR] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#ERA[ERA] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#HOUR0[HOUR0] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#HOUR1[HOUR1] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#HOUR_OF_DAY0[HOUR_OF_DAY0] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#HOUR_OF_DAY1[HOUR_OF_DAY1] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#MILLISECOND[MILLISECOND] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#MINUTE[MINUTE] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#MONTH[MONTH] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#SECOND[SECOND] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#TIME_ZONE[TIME_ZONE] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#WEEK_OF_MONTH[WEEK_OF_MONTH] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#WEEK_OF_YEAR[WEEK_OF_YEAR] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#YEAR[YEAR] -* static DateFormat.Field {java11-javadoc}/java.base/java/text/DateFormat$Field.html#ofCalendarField(int)[ofCalendarField](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/text/DateFormat$Field.html#getCalendarField()[getCalendarField]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DateFormatSymbols]] -==== DateFormatSymbols -* static Locale[] {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#getAvailableLocales()[getAvailableLocales]() -* static DateFormatSymbols {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#getInstance()[getInstance]() -* static DateFormatSymbols {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#getInstance(java.util.Locale)[getInstance](Locale) -* {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#()[DateFormatSymbols]() -* {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#(java.util.Locale)[DateFormatSymbols](Locale) -* def {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null[] {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#getAmPmStrings()[getAmPmStrings]() -* null[] {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#getEras()[getEras]() -* null {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#getLocalPatternChars()[getLocalPatternChars]() -* null[] {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#getMonths()[getMonths]() -* null[] {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#getShortMonths()[getShortMonths]() -* null[] {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#getShortWeekdays()[getShortWeekdays]() -* null[] {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#getWeekdays()[getWeekdays]() -* null[][] {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#getZoneStrings()[getZoneStrings]() -* int {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#hashCode()[hashCode]() -* void {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#setAmPmStrings(java.lang.String%5B%5D)[setAmPmStrings](null[]) -* void {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#setEras(java.lang.String%5B%5D)[setEras](null[]) -* void {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#setLocalPatternChars(java.lang.String)[setLocalPatternChars](null) -* void {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#setMonths(java.lang.String%5B%5D)[setMonths](null[]) -* void {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#setShortMonths(java.lang.String%5B%5D)[setShortMonths](null[]) -* void {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#setShortWeekdays(java.lang.String%5B%5D)[setShortWeekdays](null[]) -* void {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#setWeekdays(java.lang.String%5B%5D)[setWeekdays](null[]) -* void {java11-javadoc}/java.base/java/text/DateFormatSymbols.html#setZoneStrings(java.lang.String%5B%5D%5B%5D)[setZoneStrings](null[][]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DecimalFormat]] -==== DecimalFormat -* {java11-javadoc}/java.base/java/text/DecimalFormat.html#()[DecimalFormat]() -* {java11-javadoc}/java.base/java/text/DecimalFormat.html#(java.lang.String)[DecimalFormat](null) -* {java11-javadoc}/java.base/java/text/DecimalFormat.html#(java.lang.String,java.text.DecimalFormatSymbols)[DecimalFormat](null, DecimalFormatSymbols) -* void {java11-javadoc}/java.base/java/text/DecimalFormat.html#applyLocalizedPattern(java.lang.String)[applyLocalizedPattern](null) -* void {java11-javadoc}/java.base/java/text/DecimalFormat.html#applyPattern(java.lang.String)[applyPattern](null) -* def {java11-javadoc}/java.base/java/text/Format.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object)[format](Object) -* StringBuffer {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object,java.lang.StringBuffer,java.text.FieldPosition)[format](Object, StringBuffer, FieldPosition) -* AttributedCharacterIterator {java11-javadoc}/java.base/java/text/Format.html#formatToCharacterIterator(java.lang.Object)[formatToCharacterIterator](Object) -* Currency {java11-javadoc}/java.base/java/text/NumberFormat.html#getCurrency()[getCurrency]() -* DecimalFormatSymbols {java11-javadoc}/java.base/java/text/DecimalFormat.html#getDecimalFormatSymbols()[getDecimalFormatSymbols]() -* int {java11-javadoc}/java.base/java/text/DecimalFormat.html#getGroupingSize()[getGroupingSize]() -* int {java11-javadoc}/java.base/java/text/NumberFormat.html#getMaximumFractionDigits()[getMaximumFractionDigits]() -* int {java11-javadoc}/java.base/java/text/NumberFormat.html#getMaximumIntegerDigits()[getMaximumIntegerDigits]() -* int {java11-javadoc}/java.base/java/text/NumberFormat.html#getMinimumFractionDigits()[getMinimumFractionDigits]() -* int {java11-javadoc}/java.base/java/text/NumberFormat.html#getMinimumIntegerDigits()[getMinimumIntegerDigits]() -* int {java11-javadoc}/java.base/java/text/DecimalFormat.html#getMultiplier()[getMultiplier]() -* null {java11-javadoc}/java.base/java/text/DecimalFormat.html#getNegativePrefix()[getNegativePrefix]() -* null {java11-javadoc}/java.base/java/text/DecimalFormat.html#getNegativeSuffix()[getNegativeSuffix]() -* null {java11-javadoc}/java.base/java/text/DecimalFormat.html#getPositivePrefix()[getPositivePrefix]() -* null {java11-javadoc}/java.base/java/text/DecimalFormat.html#getPositiveSuffix()[getPositiveSuffix]() -* RoundingMode {java11-javadoc}/java.base/java/text/NumberFormat.html#getRoundingMode()[getRoundingMode]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/text/DecimalFormat.html#isDecimalSeparatorAlwaysShown()[isDecimalSeparatorAlwaysShown]() -* boolean {java11-javadoc}/java.base/java/text/NumberFormat.html#isGroupingUsed()[isGroupingUsed]() -* boolean {java11-javadoc}/java.base/java/text/DecimalFormat.html#isParseBigDecimal()[isParseBigDecimal]() -* boolean {java11-javadoc}/java.base/java/text/NumberFormat.html#isParseIntegerOnly()[isParseIntegerOnly]() -* Number {java11-javadoc}/java.base/java/text/NumberFormat.html#parse(java.lang.String)[parse](null) -* Number {java11-javadoc}/java.base/java/text/NumberFormat.html#parse(java.lang.String,java.text.ParsePosition)[parse](null, ParsePosition) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String)[parseObject](null) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String,java.text.ParsePosition)[parseObject](null, ParsePosition) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setCurrency(java.util.Currency)[setCurrency](Currency) -* void {java11-javadoc}/java.base/java/text/DecimalFormat.html#setDecimalFormatSymbols(java.text.DecimalFormatSymbols)[setDecimalFormatSymbols](DecimalFormatSymbols) -* void {java11-javadoc}/java.base/java/text/DecimalFormat.html#setDecimalSeparatorAlwaysShown(boolean)[setDecimalSeparatorAlwaysShown](boolean) -* void {java11-javadoc}/java.base/java/text/DecimalFormat.html#setGroupingSize(int)[setGroupingSize](int) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setGroupingUsed(boolean)[setGroupingUsed](boolean) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setMaximumFractionDigits(int)[setMaximumFractionDigits](int) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setMaximumIntegerDigits(int)[setMaximumIntegerDigits](int) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setMinimumFractionDigits(int)[setMinimumFractionDigits](int) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setMinimumIntegerDigits(int)[setMinimumIntegerDigits](int) -* void {java11-javadoc}/java.base/java/text/DecimalFormat.html#setMultiplier(int)[setMultiplier](int) -* void {java11-javadoc}/java.base/java/text/DecimalFormat.html#setNegativePrefix(java.lang.String)[setNegativePrefix](null) -* void {java11-javadoc}/java.base/java/text/DecimalFormat.html#setNegativeSuffix(java.lang.String)[setNegativeSuffix](null) -* void {java11-javadoc}/java.base/java/text/DecimalFormat.html#setParseBigDecimal(boolean)[setParseBigDecimal](boolean) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setParseIntegerOnly(boolean)[setParseIntegerOnly](boolean) -* void {java11-javadoc}/java.base/java/text/DecimalFormat.html#setPositivePrefix(java.lang.String)[setPositivePrefix](null) -* void {java11-javadoc}/java.base/java/text/DecimalFormat.html#setPositiveSuffix(java.lang.String)[setPositiveSuffix](null) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setRoundingMode(java.math.RoundingMode)[setRoundingMode](RoundingMode) -* null {java11-javadoc}/java.base/java/text/DecimalFormat.html#toLocalizedPattern()[toLocalizedPattern]() -* null {java11-javadoc}/java.base/java/text/DecimalFormat.html#toPattern()[toPattern]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DecimalFormatSymbols]] -==== DecimalFormatSymbols -* static Locale[] {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getAvailableLocales()[getAvailableLocales]() -* static DecimalFormatSymbols {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getInstance()[getInstance]() -* static DecimalFormatSymbols {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getInstance(java.util.Locale)[getInstance](Locale) -* {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#()[DecimalFormatSymbols]() -* {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#(java.util.Locale)[DecimalFormatSymbols](Locale) -* def {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* Currency {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getCurrency()[getCurrency]() -* null {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getCurrencySymbol()[getCurrencySymbol]() -* char {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getDecimalSeparator()[getDecimalSeparator]() -* char {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getDigit()[getDigit]() -* null {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getExponentSeparator()[getExponentSeparator]() -* char {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getGroupingSeparator()[getGroupingSeparator]() -* null {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getInfinity()[getInfinity]() -* null {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getInternationalCurrencySymbol()[getInternationalCurrencySymbol]() -* char {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getMinusSign()[getMinusSign]() -* char {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getMonetaryDecimalSeparator()[getMonetaryDecimalSeparator]() -* null {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getNaN()[getNaN]() -* char {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getPatternSeparator()[getPatternSeparator]() -* char {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getPerMill()[getPerMill]() -* char {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getPercent()[getPercent]() -* char {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#getZeroDigit()[getZeroDigit]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setCurrency(java.util.Currency)[setCurrency](Currency) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setCurrencySymbol(java.lang.String)[setCurrencySymbol](null) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setDecimalSeparator(char)[setDecimalSeparator](char) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setDigit(char)[setDigit](char) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setExponentSeparator(java.lang.String)[setExponentSeparator](null) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setGroupingSeparator(char)[setGroupingSeparator](char) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setInfinity(java.lang.String)[setInfinity](null) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setInternationalCurrencySymbol(java.lang.String)[setInternationalCurrencySymbol](null) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setMinusSign(char)[setMinusSign](char) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setMonetaryDecimalSeparator(char)[setMonetaryDecimalSeparator](char) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setNaN(java.lang.String)[setNaN](null) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setPatternSeparator(char)[setPatternSeparator](char) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setPerMill(char)[setPerMill](char) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setPercent(char)[setPercent](char) -* void {java11-javadoc}/java.base/java/text/DecimalFormatSymbols.html#setZeroDigit(char)[setZeroDigit](char) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-FieldPosition]] -==== FieldPosition -* {java11-javadoc}/java.base/java/text/FieldPosition.html#(int)[FieldPosition](int) -* {java11-javadoc}/java.base/java/text/FieldPosition.html#(java.text.Format$Field,int)[FieldPosition](Format.Field, int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/text/FieldPosition.html#getBeginIndex()[getBeginIndex]() -* int {java11-javadoc}/java.base/java/text/FieldPosition.html#getEndIndex()[getEndIndex]() -* int {java11-javadoc}/java.base/java/text/FieldPosition.html#getField()[getField]() -* Format.Field {java11-javadoc}/java.base/java/text/FieldPosition.html#getFieldAttribute()[getFieldAttribute]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* void {java11-javadoc}/java.base/java/text/FieldPosition.html#setBeginIndex(int)[setBeginIndex](int) -* void {java11-javadoc}/java.base/java/text/FieldPosition.html#setEndIndex(int)[setEndIndex](int) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Format]] -==== Format -* def {java11-javadoc}/java.base/java/text/Format.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object)[format](Object) -* StringBuffer {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object,java.lang.StringBuffer,java.text.FieldPosition)[format](Object, StringBuffer, FieldPosition) -* AttributedCharacterIterator {java11-javadoc}/java.base/java/text/Format.html#formatToCharacterIterator(java.lang.Object)[formatToCharacterIterator](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String)[parseObject](null) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String,java.text.ParsePosition)[parseObject](null, ParsePosition) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Format-Field]] -==== Format.Field -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-MessageFormat]] -==== MessageFormat -* static null {java11-javadoc}/java.base/java/text/MessageFormat.html#format(java.lang.String,java.lang.Object%5B%5D)[format](null, Object[]) -* void {java11-javadoc}/java.base/java/text/MessageFormat.html#applyPattern(java.lang.String)[applyPattern](null) -* def {java11-javadoc}/java.base/java/text/Format.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object)[format](Object) -* StringBuffer {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object,java.lang.StringBuffer,java.text.FieldPosition)[format](Object, StringBuffer, FieldPosition) -* AttributedCharacterIterator {java11-javadoc}/java.base/java/text/Format.html#formatToCharacterIterator(java.lang.Object)[formatToCharacterIterator](Object) -* Format[] {java11-javadoc}/java.base/java/text/MessageFormat.html#getFormats()[getFormats]() -* Format[] {java11-javadoc}/java.base/java/text/MessageFormat.html#getFormatsByArgumentIndex()[getFormatsByArgumentIndex]() -* Locale {java11-javadoc}/java.base/java/text/MessageFormat.html#getLocale()[getLocale]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Object[] {java11-javadoc}/java.base/java/text/MessageFormat.html#parse(java.lang.String)[parse](null) -* Object[] {java11-javadoc}/java.base/java/text/MessageFormat.html#parse(java.lang.String,java.text.ParsePosition)[parse](null, ParsePosition) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String)[parseObject](null) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String,java.text.ParsePosition)[parseObject](null, ParsePosition) -* void {java11-javadoc}/java.base/java/text/MessageFormat.html#setFormat(int,java.text.Format)[setFormat](int, Format) -* void {java11-javadoc}/java.base/java/text/MessageFormat.html#setFormatByArgumentIndex(int,java.text.Format)[setFormatByArgumentIndex](int, Format) -* void {java11-javadoc}/java.base/java/text/MessageFormat.html#setFormats(java.text.Format%5B%5D)[setFormats](Format[]) -* void {java11-javadoc}/java.base/java/text/MessageFormat.html#setFormatsByArgumentIndex(java.text.Format%5B%5D)[setFormatsByArgumentIndex](Format[]) -* void {java11-javadoc}/java.base/java/text/MessageFormat.html#setLocale(java.util.Locale)[setLocale](Locale) -* null {java11-javadoc}/java.base/java/text/MessageFormat.html#toPattern()[toPattern]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-MessageFormat-Field]] -==== MessageFormat.Field -* static MessageFormat.Field {java11-javadoc}/java.base/java/text/MessageFormat$Field.html#ARGUMENT[ARGUMENT] -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Normalizer]] -==== Normalizer -* static boolean {java11-javadoc}/java.base/java/text/Normalizer.html#isNormalized(java.lang.CharSequence,java.text.Normalizer$Form)[isNormalized](CharSequence, Normalizer.Form) -* static null {java11-javadoc}/java.base/java/text/Normalizer.html#normalize(java.lang.CharSequence,java.text.Normalizer$Form)[normalize](CharSequence, Normalizer.Form) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Normalizer-Form]] -==== Normalizer.Form -* static Normalizer.Form {java11-javadoc}/java.base/java/text/Normalizer$Form.html#NFC[NFC] -* static Normalizer.Form {java11-javadoc}/java.base/java/text/Normalizer$Form.html#NFD[NFD] -* static Normalizer.Form {java11-javadoc}/java.base/java/text/Normalizer$Form.html#NFKC[NFKC] -* static Normalizer.Form {java11-javadoc}/java.base/java/text/Normalizer$Form.html#NFKD[NFKD] -* static Normalizer.Form {java11-javadoc}/java.base/java/text/Normalizer$Form.html#valueOf(java.lang.String)[valueOf](null) -* static Normalizer.Form[] {java11-javadoc}/java.base/java/text/Normalizer$Form.html#values()[values]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-NumberFormat]] -==== NumberFormat -* static int {java11-javadoc}/java.base/java/text/NumberFormat.html#FRACTION_FIELD[FRACTION_FIELD] -* static int {java11-javadoc}/java.base/java/text/NumberFormat.html#INTEGER_FIELD[INTEGER_FIELD] -* static Locale[] {java11-javadoc}/java.base/java/text/NumberFormat.html#getAvailableLocales()[getAvailableLocales]() -* static NumberFormat {java11-javadoc}/java.base/java/text/NumberFormat.html#getCurrencyInstance()[getCurrencyInstance]() -* static NumberFormat {java11-javadoc}/java.base/java/text/NumberFormat.html#getCurrencyInstance(java.util.Locale)[getCurrencyInstance](Locale) -* static NumberFormat {java11-javadoc}/java.base/java/text/NumberFormat.html#getInstance()[getInstance]() -* static NumberFormat {java11-javadoc}/java.base/java/text/NumberFormat.html#getInstance(java.util.Locale)[getInstance](Locale) -* static NumberFormat {java11-javadoc}/java.base/java/text/NumberFormat.html#getIntegerInstance()[getIntegerInstance]() -* static NumberFormat {java11-javadoc}/java.base/java/text/NumberFormat.html#getIntegerInstance(java.util.Locale)[getIntegerInstance](Locale) -* static NumberFormat {java11-javadoc}/java.base/java/text/NumberFormat.html#getNumberInstance()[getNumberInstance]() -* static NumberFormat {java11-javadoc}/java.base/java/text/NumberFormat.html#getNumberInstance(java.util.Locale)[getNumberInstance](Locale) -* static NumberFormat {java11-javadoc}/java.base/java/text/NumberFormat.html#getPercentInstance()[getPercentInstance]() -* static NumberFormat {java11-javadoc}/java.base/java/text/NumberFormat.html#getPercentInstance(java.util.Locale)[getPercentInstance](Locale) -* def {java11-javadoc}/java.base/java/text/Format.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object)[format](Object) -* StringBuffer {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object,java.lang.StringBuffer,java.text.FieldPosition)[format](Object, StringBuffer, FieldPosition) -* AttributedCharacterIterator {java11-javadoc}/java.base/java/text/Format.html#formatToCharacterIterator(java.lang.Object)[formatToCharacterIterator](Object) -* Currency {java11-javadoc}/java.base/java/text/NumberFormat.html#getCurrency()[getCurrency]() -* int {java11-javadoc}/java.base/java/text/NumberFormat.html#getMaximumFractionDigits()[getMaximumFractionDigits]() -* int {java11-javadoc}/java.base/java/text/NumberFormat.html#getMaximumIntegerDigits()[getMaximumIntegerDigits]() -* int {java11-javadoc}/java.base/java/text/NumberFormat.html#getMinimumFractionDigits()[getMinimumFractionDigits]() -* int {java11-javadoc}/java.base/java/text/NumberFormat.html#getMinimumIntegerDigits()[getMinimumIntegerDigits]() -* RoundingMode {java11-javadoc}/java.base/java/text/NumberFormat.html#getRoundingMode()[getRoundingMode]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/text/NumberFormat.html#isGroupingUsed()[isGroupingUsed]() -* boolean {java11-javadoc}/java.base/java/text/NumberFormat.html#isParseIntegerOnly()[isParseIntegerOnly]() -* Number {java11-javadoc}/java.base/java/text/NumberFormat.html#parse(java.lang.String)[parse](null) -* Number {java11-javadoc}/java.base/java/text/NumberFormat.html#parse(java.lang.String,java.text.ParsePosition)[parse](null, ParsePosition) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String)[parseObject](null) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String,java.text.ParsePosition)[parseObject](null, ParsePosition) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setCurrency(java.util.Currency)[setCurrency](Currency) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setGroupingUsed(boolean)[setGroupingUsed](boolean) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setMaximumFractionDigits(int)[setMaximumFractionDigits](int) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setMaximumIntegerDigits(int)[setMaximumIntegerDigits](int) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setMinimumFractionDigits(int)[setMinimumFractionDigits](int) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setMinimumIntegerDigits(int)[setMinimumIntegerDigits](int) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setParseIntegerOnly(boolean)[setParseIntegerOnly](boolean) -* void {java11-javadoc}/java.base/java/text/NumberFormat.html#setRoundingMode(java.math.RoundingMode)[setRoundingMode](RoundingMode) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-NumberFormat-Field]] -==== NumberFormat.Field -* static NumberFormat.Field {java11-javadoc}/java.base/java/text/NumberFormat$Field.html#CURRENCY[CURRENCY] -* static NumberFormat.Field {java11-javadoc}/java.base/java/text/NumberFormat$Field.html#DECIMAL_SEPARATOR[DECIMAL_SEPARATOR] -* static NumberFormat.Field {java11-javadoc}/java.base/java/text/NumberFormat$Field.html#EXPONENT[EXPONENT] -* static NumberFormat.Field {java11-javadoc}/java.base/java/text/NumberFormat$Field.html#EXPONENT_SIGN[EXPONENT_SIGN] -* static NumberFormat.Field {java11-javadoc}/java.base/java/text/NumberFormat$Field.html#EXPONENT_SYMBOL[EXPONENT_SYMBOL] -* static NumberFormat.Field {java11-javadoc}/java.base/java/text/NumberFormat$Field.html#FRACTION[FRACTION] -* static NumberFormat.Field {java11-javadoc}/java.base/java/text/NumberFormat$Field.html#GROUPING_SEPARATOR[GROUPING_SEPARATOR] -* static NumberFormat.Field {java11-javadoc}/java.base/java/text/NumberFormat$Field.html#INTEGER[INTEGER] -* static NumberFormat.Field {java11-javadoc}/java.base/java/text/NumberFormat$Field.html#PERCENT[PERCENT] -* static NumberFormat.Field {java11-javadoc}/java.base/java/text/NumberFormat$Field.html#PERMILLE[PERMILLE] -* static NumberFormat.Field {java11-javadoc}/java.base/java/text/NumberFormat$Field.html#SIGN[SIGN] -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ParseException]] -==== ParseException -* {java11-javadoc}/java.base/java/text/ParseException.html#(java.lang.String,int)[ParseException](null, int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/text/ParseException.html#getErrorOffset()[getErrorOffset]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ParsePosition]] -==== ParsePosition -* {java11-javadoc}/java.base/java/text/ParsePosition.html#(int)[ParsePosition](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/text/ParsePosition.html#getErrorIndex()[getErrorIndex]() -* int {java11-javadoc}/java.base/java/text/ParsePosition.html#getIndex()[getIndex]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* void {java11-javadoc}/java.base/java/text/ParsePosition.html#setErrorIndex(int)[setErrorIndex](int) -* void {java11-javadoc}/java.base/java/text/ParsePosition.html#setIndex(int)[setIndex](int) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-RuleBasedCollator]] -==== RuleBasedCollator -* {java11-javadoc}/java.base/java/text/RuleBasedCollator.html#(java.lang.String)[RuleBasedCollator](null) -* def {java11-javadoc}/java.base/java/text/Collator.html#clone()[clone]() -* int {java11-javadoc}/java.base/java/util/Comparator.html#compare(java.lang.Object,java.lang.Object)[compare](def, def) -* boolean {java11-javadoc}/java.base/java/util/Comparator.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/text/Collator.html#equals(java.lang.String,java.lang.String)[equals](null, null) -* CollationElementIterator {java11-javadoc}/java.base/java/text/RuleBasedCollator.html#getCollationElementIterator(java.lang.String)[getCollationElementIterator](null) -* CollationKey {java11-javadoc}/java.base/java/text/Collator.html#getCollationKey(java.lang.String)[getCollationKey](null) -* int {java11-javadoc}/java.base/java/text/Collator.html#getDecomposition()[getDecomposition]() -* null {java11-javadoc}/java.base/java/text/RuleBasedCollator.html#getRules()[getRules]() -* int {java11-javadoc}/java.base/java/text/Collator.html#getStrength()[getStrength]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#reversed()[reversed]() -* void {java11-javadoc}/java.base/java/text/Collator.html#setDecomposition(int)[setDecomposition](int) -* void {java11-javadoc}/java.base/java/text/Collator.html#setStrength(int)[setStrength](int) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparing(java.util.Comparator)[thenComparing](Comparator) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparing(java.util.function.Function,java.util.Comparator)[thenComparing](Function, Comparator) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparingDouble(java.util.function.ToDoubleFunction)[thenComparingDouble](ToDoubleFunction) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparingInt(java.util.function.ToIntFunction)[thenComparingInt](ToIntFunction) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparingLong(java.util.function.ToLongFunction)[thenComparingLong](ToLongFunction) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-SimpleDateFormat]] -==== SimpleDateFormat -* {java11-javadoc}/java.base/java/text/SimpleDateFormat.html#()[SimpleDateFormat]() -* {java11-javadoc}/java.base/java/text/SimpleDateFormat.html#(java.lang.String)[SimpleDateFormat](null) -* {java11-javadoc}/java.base/java/text/SimpleDateFormat.html#(java.lang.String,java.util.Locale)[SimpleDateFormat](null, Locale) -* void {java11-javadoc}/java.base/java/text/SimpleDateFormat.html#applyLocalizedPattern(java.lang.String)[applyLocalizedPattern](null) -* void {java11-javadoc}/java.base/java/text/SimpleDateFormat.html#applyPattern(java.lang.String)[applyPattern](null) -* def {java11-javadoc}/java.base/java/text/Format.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object)[format](Object) -* StringBuffer {java11-javadoc}/java.base/java/text/Format.html#format(java.lang.Object,java.lang.StringBuffer,java.text.FieldPosition)[format](Object, StringBuffer, FieldPosition) -* AttributedCharacterIterator {java11-javadoc}/java.base/java/text/Format.html#formatToCharacterIterator(java.lang.Object)[formatToCharacterIterator](Object) -* Date {java11-javadoc}/java.base/java/text/SimpleDateFormat.html#get2DigitYearStart()[get2DigitYearStart]() -* Calendar {java11-javadoc}/java.base/java/text/DateFormat.html#getCalendar()[getCalendar]() -* DateFormatSymbols {java11-javadoc}/java.base/java/text/SimpleDateFormat.html#getDateFormatSymbols()[getDateFormatSymbols]() -* NumberFormat {java11-javadoc}/java.base/java/text/DateFormat.html#getNumberFormat()[getNumberFormat]() -* TimeZone {java11-javadoc}/java.base/java/text/DateFormat.html#getTimeZone()[getTimeZone]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/text/DateFormat.html#isLenient()[isLenient]() -* Date {java11-javadoc}/java.base/java/text/DateFormat.html#parse(java.lang.String)[parse](null) -* Date {java11-javadoc}/java.base/java/text/DateFormat.html#parse(java.lang.String,java.text.ParsePosition)[parse](null, ParsePosition) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String)[parseObject](null) -* Object {java11-javadoc}/java.base/java/text/Format.html#parseObject(java.lang.String,java.text.ParsePosition)[parseObject](null, ParsePosition) -* void {java11-javadoc}/java.base/java/text/SimpleDateFormat.html#set2DigitYearStart(java.util.Date)[set2DigitYearStart](Date) -* void {java11-javadoc}/java.base/java/text/DateFormat.html#setCalendar(java.util.Calendar)[setCalendar](Calendar) -* void {java11-javadoc}/java.base/java/text/SimpleDateFormat.html#setDateFormatSymbols(java.text.DateFormatSymbols)[setDateFormatSymbols](DateFormatSymbols) -* void {java11-javadoc}/java.base/java/text/DateFormat.html#setLenient(boolean)[setLenient](boolean) -* void {java11-javadoc}/java.base/java/text/DateFormat.html#setNumberFormat(java.text.NumberFormat)[setNumberFormat](NumberFormat) -* void {java11-javadoc}/java.base/java/text/DateFormat.html#setTimeZone(java.util.TimeZone)[setTimeZone](TimeZone) -* null {java11-javadoc}/java.base/java/text/SimpleDateFormat.html#toLocalizedPattern()[toLocalizedPattern]() -* null {java11-javadoc}/java.base/java/text/SimpleDateFormat.html#toPattern()[toPattern]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-StringCharacterIterator]] -==== StringCharacterIterator -* {java11-javadoc}/java.base/java/text/StringCharacterIterator.html#(java.lang.String)[StringCharacterIterator](null) -* {java11-javadoc}/java.base/java/text/StringCharacterIterator.html#(java.lang.String,int)[StringCharacterIterator](null, int) -* {java11-javadoc}/java.base/java/text/StringCharacterIterator.html#(java.lang.String,int,int,int)[StringCharacterIterator](null, int, int, int) -* def {java11-javadoc}/java.base/java/text/CharacterIterator.html#clone()[clone]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#current()[current]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#first()[first]() -* int {java11-javadoc}/java.base/java/text/CharacterIterator.html#getBeginIndex()[getBeginIndex]() -* int {java11-javadoc}/java.base/java/text/CharacterIterator.html#getEndIndex()[getEndIndex]() -* int {java11-javadoc}/java.base/java/text/CharacterIterator.html#getIndex()[getIndex]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#last()[last]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#next()[next]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#previous()[previous]() -* char {java11-javadoc}/java.base/java/text/CharacterIterator.html#setIndex(int)[setIndex](int) -* void {java11-javadoc}/java.base/java/text/StringCharacterIterator.html#setText(java.lang.String)[setText](null) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-java-time"] -=== Shared API for package java.time -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-Clock]] -==== Clock -* static Clock {java11-javadoc}/java.base/java/time/Clock.html#fixed(java.time.Instant,java.time.ZoneId)[fixed](Instant, ZoneId) -* static Clock {java11-javadoc}/java.base/java/time/Clock.html#offset(java.time.Clock,java.time.Duration)[offset](Clock, Duration) -* static Clock {java11-javadoc}/java.base/java/time/Clock.html#tick(java.time.Clock,java.time.Duration)[tick](Clock, Duration) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* ZoneId {java11-javadoc}/java.base/java/time/Clock.html#getZone()[getZone]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Instant {java11-javadoc}/java.base/java/time/Clock.html#instant()[instant]() -* long {java11-javadoc}/java.base/java/time/Clock.html#millis()[millis]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DateTimeException]] -==== DateTimeException -* {java11-javadoc}/java.base/java/time/DateTimeException.html#(java.lang.String)[DateTimeException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DayOfWeek]] -==== DayOfWeek -* static DayOfWeek {java11-javadoc}/java.base/java/time/DayOfWeek.html#FRIDAY[FRIDAY] -* static DayOfWeek {java11-javadoc}/java.base/java/time/DayOfWeek.html#MONDAY[MONDAY] -* static DayOfWeek {java11-javadoc}/java.base/java/time/DayOfWeek.html#SATURDAY[SATURDAY] -* static DayOfWeek {java11-javadoc}/java.base/java/time/DayOfWeek.html#SUNDAY[SUNDAY] -* static DayOfWeek {java11-javadoc}/java.base/java/time/DayOfWeek.html#THURSDAY[THURSDAY] -* static DayOfWeek {java11-javadoc}/java.base/java/time/DayOfWeek.html#TUESDAY[TUESDAY] -* static DayOfWeek {java11-javadoc}/java.base/java/time/DayOfWeek.html#WEDNESDAY[WEDNESDAY] -* static DayOfWeek {java11-javadoc}/java.base/java/time/DayOfWeek.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static DayOfWeek {java11-javadoc}/java.base/java/time/DayOfWeek.html#of(int)[of](int) -* static DayOfWeek {java11-javadoc}/java.base/java/time/DayOfWeek.html#valueOf(java.lang.String)[valueOf](null) -* static DayOfWeek[] {java11-javadoc}/java.base/java/time/DayOfWeek.html#values()[values]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* null {java11-javadoc}/java.base/java/time/DayOfWeek.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/DayOfWeek.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* DayOfWeek {java11-javadoc}/java.base/java/time/DayOfWeek.html#minus(long)[minus](long) -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* DayOfWeek {java11-javadoc}/java.base/java/time/DayOfWeek.html#plus(long)[plus](long) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Duration]] -==== Duration -* static Duration {java11-javadoc}/java.base/java/time/Duration.html#ZERO[ZERO] -* static Duration {java11-javadoc}/java.base/java/time/Duration.html#between(java.time.temporal.Temporal,java.time.temporal.Temporal)[between](Temporal, Temporal) -* static Duration {java11-javadoc}/java.base/java/time/Duration.html#from(java.time.temporal.TemporalAmount)[from](TemporalAmount) -* static Duration {java11-javadoc}/java.base/java/time/Duration.html#of(long,java.time.temporal.TemporalUnit)[of](long, TemporalUnit) -* static Duration {java11-javadoc}/java.base/java/time/Duration.html#ofDays(long)[ofDays](long) -* static Duration {java11-javadoc}/java.base/java/time/Duration.html#ofHours(long)[ofHours](long) -* static Duration {java11-javadoc}/java.base/java/time/Duration.html#ofMillis(long)[ofMillis](long) -* static Duration {java11-javadoc}/java.base/java/time/Duration.html#ofMinutes(long)[ofMinutes](long) -* static Duration {java11-javadoc}/java.base/java/time/Duration.html#ofNanos(long)[ofNanos](long) -* static Duration {java11-javadoc}/java.base/java/time/Duration.html#ofSeconds(long)[ofSeconds](long) -* static Duration {java11-javadoc}/java.base/java/time/Duration.html#ofSeconds(long,long)[ofSeconds](long, long) -* static Duration {java11-javadoc}/java.base/java/time/Duration.html#parse(java.lang.CharSequence)[parse](CharSequence) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#abs()[abs]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#addTo(java.time.temporal.Temporal)[addTo](Temporal) -* int {java11-javadoc}/java.base/java/time/Duration.html#compareTo(java.time.Duration)[compareTo](Duration) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#dividedBy(long)[dividedBy](long) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#get(java.time.temporal.TemporalUnit)[get](TemporalUnit) -* int {java11-javadoc}/java.base/java/time/Duration.html#getNano()[getNano]() -* long {java11-javadoc}/java.base/java/time/Duration.html#getSeconds()[getSeconds]() -* List {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#getUnits()[getUnits]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/Duration.html#isNegative()[isNegative]() -* boolean {java11-javadoc}/java.base/java/time/Duration.html#isZero()[isZero]() -* Duration {java11-javadoc}/java.base/java/time/Duration.html#minus(java.time.Duration)[minus](Duration) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#minusDays(long)[minusDays](long) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#minusHours(long)[minusHours](long) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#minusMillis(long)[minusMillis](long) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#minusMinutes(long)[minusMinutes](long) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#minusNanos(long)[minusNanos](long) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#minusSeconds(long)[minusSeconds](long) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#multipliedBy(long)[multipliedBy](long) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#negated()[negated]() -* Duration {java11-javadoc}/java.base/java/time/Duration.html#plus(java.time.Duration)[plus](Duration) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#plusDays(long)[plusDays](long) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#plusHours(long)[plusHours](long) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#plusMillis(long)[plusMillis](long) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#plusMinutes(long)[plusMinutes](long) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#plusNanos(long)[plusNanos](long) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#plusSeconds(long)[plusSeconds](long) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#subtractFrom(java.time.temporal.Temporal)[subtractFrom](Temporal) -* long {java11-javadoc}/java.base/java/time/Duration.html#toDays()[toDays]() -* long {java11-javadoc}/java.base/java/time/Duration.html#toHours()[toHours]() -* long {java11-javadoc}/java.base/java/time/Duration.html#toMillis()[toMillis]() -* long {java11-javadoc}/java.base/java/time/Duration.html#toMinutes()[toMinutes]() -* long {java11-javadoc}/java.base/java/time/Duration.html#toNanos()[toNanos]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Duration {java11-javadoc}/java.base/java/time/Duration.html#withNanos(int)[withNanos](int) -* Duration {java11-javadoc}/java.base/java/time/Duration.html#withSeconds(long)[withSeconds](long) - - -[[painless-api-reference-shared-Instant]] -==== Instant -* static Instant {java11-javadoc}/java.base/java/time/Instant.html#EPOCH[EPOCH] -* static Instant {java11-javadoc}/java.base/java/time/Instant.html#MAX[MAX] -* static Instant {java11-javadoc}/java.base/java/time/Instant.html#MIN[MIN] -* static Instant {java11-javadoc}/java.base/java/time/Instant.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static Instant {java11-javadoc}/java.base/java/time/Instant.html#ofEpochMilli(long)[ofEpochMilli](long) -* static Instant {java11-javadoc}/java.base/java/time/Instant.html#ofEpochSecond(long)[ofEpochSecond](long) -* static Instant {java11-javadoc}/java.base/java/time/Instant.html#ofEpochSecond(long,long)[ofEpochSecond](long, long) -* static Instant {java11-javadoc}/java.base/java/time/Instant.html#parse(java.lang.CharSequence)[parse](CharSequence) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* OffsetDateTime {java11-javadoc}/java.base/java/time/Instant.html#atOffset(java.time.ZoneOffset)[atOffset](ZoneOffset) -* ZonedDateTime {java11-javadoc}/java.base/java/time/Instant.html#atZone(java.time.ZoneId)[atZone](ZoneId) -* int {java11-javadoc}/java.base/java/time/Instant.html#compareTo(java.time.Instant)[compareTo](Instant) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* long {java11-javadoc}/java.base/java/time/Instant.html#getEpochSecond()[getEpochSecond]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/Instant.html#getNano()[getNano]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/Instant.html#isAfter(java.time.Instant)[isAfter](Instant) -* boolean {java11-javadoc}/java.base/java/time/Instant.html#isBefore(java.time.Instant)[isBefore](Instant) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* Instant {java11-javadoc}/java.base/java/time/Instant.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* Instant {java11-javadoc}/java.base/java/time/Instant.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* Instant {java11-javadoc}/java.base/java/time/Instant.html#minusMillis(long)[minusMillis](long) -* Instant {java11-javadoc}/java.base/java/time/Instant.html#minusNanos(long)[minusNanos](long) -* Instant {java11-javadoc}/java.base/java/time/Instant.html#minusSeconds(long)[minusSeconds](long) -* Instant {java11-javadoc}/java.base/java/time/Instant.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* Instant {java11-javadoc}/java.base/java/time/Instant.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* Instant {java11-javadoc}/java.base/java/time/Instant.html#plusMillis(long)[plusMillis](long) -* Instant {java11-javadoc}/java.base/java/time/Instant.html#plusNanos(long)[plusNanos](long) -* Instant {java11-javadoc}/java.base/java/time/Instant.html#plusSeconds(long)[plusSeconds](long) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/Instant.html#toEpochMilli()[toEpochMilli]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Instant {java11-javadoc}/java.base/java/time/Instant.html#truncatedTo(java.time.temporal.TemporalUnit)[truncatedTo](TemporalUnit) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* Instant {java11-javadoc}/java.base/java/time/Instant.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* Instant {java11-javadoc}/java.base/java/time/Instant.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) - - -[[painless-api-reference-shared-LocalDate]] -==== LocalDate -* static LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#MAX[MAX] -* static LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#MIN[MIN] -* static LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#of(int,int,int)[of](int, int, int) -* static LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#ofEpochDay(long)[ofEpochDay](long) -* static LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#ofYearDay(int,int)[ofYearDay](int, int) -* static LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#parse(java.lang.CharSequence)[parse](CharSequence) -* static LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#parse(java.lang.CharSequence,java.time.format.DateTimeFormatter)[parse](CharSequence, DateTimeFormatter) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDate.html#atStartOfDay()[atStartOfDay]() -* ZonedDateTime {java11-javadoc}/java.base/java/time/LocalDate.html#atStartOfDay(java.time.ZoneId)[atStartOfDay](ZoneId) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDate.html#atTime(java.time.LocalTime)[atTime](LocalTime) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDate.html#atTime(int,int)[atTime](int, int) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDate.html#atTime(int,int,int)[atTime](int, int, int) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDate.html#atTime(int,int,int,int)[atTime](int, int, int, int) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#compareTo(java.time.chrono.ChronoLocalDate)[compareTo](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* IsoChronology {java11-javadoc}/java.base/java/time/LocalDate.html#getChronology()[getChronology]() -* int {java11-javadoc}/java.base/java/time/LocalDate.html#getDayOfMonth()[getDayOfMonth]() -* DayOfWeek {java11-javadoc}/java.base/java/time/LocalDate.html#getDayOfWeek()[getDayOfWeek]() -* int {java11-javadoc}/java.base/java/time/LocalDate.html#getDayOfYear()[getDayOfYear]() -* Era {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#getEra()[getEra]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* Month {java11-javadoc}/java.base/java/time/LocalDate.html#getMonth()[getMonth]() -* int {java11-javadoc}/java.base/java/time/LocalDate.html#getMonthValue()[getMonthValue]() -* int {java11-javadoc}/java.base/java/time/LocalDate.html#getYear()[getYear]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isAfter(java.time.chrono.ChronoLocalDate)[isAfter](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isBefore(java.time.chrono.ChronoLocalDate)[isBefore](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isEqual(java.time.chrono.ChronoLocalDate)[isEqual](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isLeapYear()[isLeapYear]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#lengthOfMonth()[lengthOfMonth]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#lengthOfYear()[lengthOfYear]() -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#minusDays(long)[minusDays](long) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#minusMonths(long)[minusMonths](long) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#minusWeeks(long)[minusWeeks](long) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#minusYears(long)[minusYears](long) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#plusDays(long)[plusDays](long) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#plusMonths(long)[plusMonths](long) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#plusWeeks(long)[plusWeeks](long) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#plusYears(long)[plusYears](long) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#toEpochDay()[toEpochDay]() -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#toString()[toString]() -* Period {java11-javadoc}/java.base/java/time/LocalDate.html#until(java.time.chrono.ChronoLocalDate)[until](ChronoLocalDate) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#withDayOfMonth(int)[withDayOfMonth](int) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#withDayOfYear(int)[withDayOfYear](int) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#withMonth(int)[withMonth](int) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDate.html#withYear(int)[withYear](int) - - -[[painless-api-reference-shared-LocalDateTime]] -==== LocalDateTime -* static LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#MAX[MAX] -* static LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#MIN[MIN] -* static LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#of(java.time.LocalDate,java.time.LocalTime)[of](LocalDate, LocalTime) -* static LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#of(int,int,int,int,int)[of](int, int, int, int, int) -* static LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#of(int,int,int,int,int,int)[of](int, int, int, int, int, int) -* static LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#of(int,int,int,int,int,int,int)[of](int, int, int, int, int, int, int) -* static LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#ofEpochSecond(long,int,java.time.ZoneOffset)[ofEpochSecond](long, int, ZoneOffset) -* static LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#ofInstant(java.time.Instant,java.time.ZoneId)[ofInstant](Instant, ZoneId) -* static LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#parse(java.lang.CharSequence)[parse](CharSequence) -* static LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#parse(java.lang.CharSequence,java.time.format.DateTimeFormatter)[parse](CharSequence, DateTimeFormatter) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* OffsetDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#atOffset(java.time.ZoneOffset)[atOffset](ZoneOffset) -* ZonedDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#atZone(java.time.ZoneId)[atZone](ZoneId) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#compareTo(java.time.chrono.ChronoLocalDateTime)[compareTo](ChronoLocalDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* Chronology {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#getChronology()[getChronology]() -* int {java11-javadoc}/java.base/java/time/LocalDateTime.html#getDayOfMonth()[getDayOfMonth]() -* DayOfWeek {java11-javadoc}/java.base/java/time/LocalDateTime.html#getDayOfWeek()[getDayOfWeek]() -* int {java11-javadoc}/java.base/java/time/LocalDateTime.html#getDayOfYear()[getDayOfYear]() -* int {java11-javadoc}/java.base/java/time/LocalDateTime.html#getHour()[getHour]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/LocalDateTime.html#getMinute()[getMinute]() -* Month {java11-javadoc}/java.base/java/time/LocalDateTime.html#getMonth()[getMonth]() -* int {java11-javadoc}/java.base/java/time/LocalDateTime.html#getMonthValue()[getMonthValue]() -* int {java11-javadoc}/java.base/java/time/LocalDateTime.html#getNano()[getNano]() -* int {java11-javadoc}/java.base/java/time/LocalDateTime.html#getSecond()[getSecond]() -* int {java11-javadoc}/java.base/java/time/LocalDateTime.html#getYear()[getYear]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#isAfter(java.time.chrono.ChronoLocalDateTime)[isAfter](ChronoLocalDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#isBefore(java.time.chrono.ChronoLocalDateTime)[isBefore](ChronoLocalDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#isEqual(java.time.chrono.ChronoLocalDateTime)[isEqual](ChronoLocalDateTime) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#minusDays(long)[minusDays](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#minusHours(long)[minusHours](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#minusMinutes(long)[minusMinutes](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#minusMonths(long)[minusMonths](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#minusNanos(long)[minusNanos](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#minusSeconds(long)[minusSeconds](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#minusWeeks(long)[minusWeeks](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#minusYears(long)[minusYears](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#plusDays(long)[plusDays](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#plusHours(long)[plusHours](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#plusMinutes(long)[plusMinutes](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#plusMonths(long)[plusMonths](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#plusNanos(long)[plusNanos](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#plusSeconds(long)[plusSeconds](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#plusWeeks(long)[plusWeeks](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#plusYears(long)[plusYears](long) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#toEpochSecond(java.time.ZoneOffset)[toEpochSecond](ZoneOffset) -* Instant {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#toInstant(java.time.ZoneOffset)[toInstant](ZoneOffset) -* LocalDate {java11-javadoc}/java.base/java/time/LocalDateTime.html#toLocalDate()[toLocalDate]() -* LocalTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#toLocalTime()[toLocalTime]() -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#toString()[toString]() -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#truncatedTo(java.time.temporal.TemporalUnit)[truncatedTo](TemporalUnit) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#withDayOfMonth(int)[withDayOfMonth](int) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#withDayOfYear(int)[withDayOfYear](int) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#withHour(int)[withHour](int) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#withMinute(int)[withMinute](int) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#withMonth(int)[withMonth](int) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#withSecond(int)[withSecond](int) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalDateTime.html#withYear(int)[withYear](int) - - -[[painless-api-reference-shared-LocalTime]] -==== LocalTime -* static LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#MAX[MAX] -* static LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#MIDNIGHT[MIDNIGHT] -* static LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#MIN[MIN] -* static LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#NOON[NOON] -* static LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#of(int,int)[of](int, int) -* static LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#of(int,int,int)[of](int, int, int) -* static LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#of(int,int,int,int)[of](int, int, int, int) -* static LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#ofNanoOfDay(long)[ofNanoOfDay](long) -* static LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#ofSecondOfDay(long)[ofSecondOfDay](long) -* static LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#parse(java.lang.CharSequence)[parse](CharSequence) -* static LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#parse(java.lang.CharSequence,java.time.format.DateTimeFormatter)[parse](CharSequence, DateTimeFormatter) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* LocalDateTime {java11-javadoc}/java.base/java/time/LocalTime.html#atDate(java.time.LocalDate)[atDate](LocalDate) -* OffsetTime {java11-javadoc}/java.base/java/time/LocalTime.html#atOffset(java.time.ZoneOffset)[atOffset](ZoneOffset) -* int {java11-javadoc}/java.base/java/time/LocalTime.html#compareTo(java.time.LocalTime)[compareTo](LocalTime) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/LocalTime.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* int {java11-javadoc}/java.base/java/time/LocalTime.html#getHour()[getHour]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/LocalTime.html#getMinute()[getMinute]() -* int {java11-javadoc}/java.base/java/time/LocalTime.html#getNano()[getNano]() -* int {java11-javadoc}/java.base/java/time/LocalTime.html#getSecond()[getSecond]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/LocalTime.html#isAfter(java.time.LocalTime)[isAfter](LocalTime) -* boolean {java11-javadoc}/java.base/java/time/LocalTime.html#isBefore(java.time.LocalTime)[isBefore](LocalTime) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#minusHours(long)[minusHours](long) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#minusMinutes(long)[minusMinutes](long) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#minusNanos(long)[minusNanos](long) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#minusSeconds(long)[minusSeconds](long) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#plusHours(long)[plusHours](long) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#plusMinutes(long)[plusMinutes](long) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#plusNanos(long)[plusNanos](long) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#plusSeconds(long)[plusSeconds](long) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/LocalTime.html#toNanoOfDay()[toNanoOfDay]() -* int {java11-javadoc}/java.base/java/time/LocalTime.html#toSecondOfDay()[toSecondOfDay]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#truncatedTo(java.time.temporal.TemporalUnit)[truncatedTo](TemporalUnit) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#withHour(int)[withHour](int) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#withMinute(int)[withMinute](int) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#withNano(int)[withNano](int) -* LocalTime {java11-javadoc}/java.base/java/time/LocalTime.html#withSecond(int)[withSecond](int) - - -[[painless-api-reference-shared-Month]] -==== Month -* static Month {java11-javadoc}/java.base/java/time/Month.html#APRIL[APRIL] -* static Month {java11-javadoc}/java.base/java/time/Month.html#AUGUST[AUGUST] -* static Month {java11-javadoc}/java.base/java/time/Month.html#DECEMBER[DECEMBER] -* static Month {java11-javadoc}/java.base/java/time/Month.html#FEBRUARY[FEBRUARY] -* static Month {java11-javadoc}/java.base/java/time/Month.html#JANUARY[JANUARY] -* static Month {java11-javadoc}/java.base/java/time/Month.html#JULY[JULY] -* static Month {java11-javadoc}/java.base/java/time/Month.html#JUNE[JUNE] -* static Month {java11-javadoc}/java.base/java/time/Month.html#MARCH[MARCH] -* static Month {java11-javadoc}/java.base/java/time/Month.html#MAY[MAY] -* static Month {java11-javadoc}/java.base/java/time/Month.html#NOVEMBER[NOVEMBER] -* static Month {java11-javadoc}/java.base/java/time/Month.html#OCTOBER[OCTOBER] -* static Month {java11-javadoc}/java.base/java/time/Month.html#SEPTEMBER[SEPTEMBER] -* static Month {java11-javadoc}/java.base/java/time/Month.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static Month {java11-javadoc}/java.base/java/time/Month.html#of(int)[of](int) -* static Month {java11-javadoc}/java.base/java/time/Month.html#valueOf(java.lang.String)[valueOf](null) -* static Month[] {java11-javadoc}/java.base/java/time/Month.html#values()[values]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/Month.html#firstDayOfYear(boolean)[firstDayOfYear](boolean) -* Month {java11-javadoc}/java.base/java/time/Month.html#firstMonthOfQuarter()[firstMonthOfQuarter]() -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* null {java11-javadoc}/java.base/java/time/Month.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/Month.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* int {java11-javadoc}/java.base/java/time/Month.html#length(boolean)[length](boolean) -* int {java11-javadoc}/java.base/java/time/Month.html#maxLength()[maxLength]() -* int {java11-javadoc}/java.base/java/time/Month.html#minLength()[minLength]() -* Month {java11-javadoc}/java.base/java/time/Month.html#minus(long)[minus](long) -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* Month {java11-javadoc}/java.base/java/time/Month.html#plus(long)[plus](long) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-MonthDay]] -==== MonthDay -* static MonthDay {java11-javadoc}/java.base/java/time/MonthDay.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static MonthDay {java11-javadoc}/java.base/java/time/MonthDay.html#of(int,int)[of](int, int) -* static MonthDay {java11-javadoc}/java.base/java/time/MonthDay.html#parse(java.lang.CharSequence)[parse](CharSequence) -* static MonthDay {java11-javadoc}/java.base/java/time/MonthDay.html#parse(java.lang.CharSequence,java.time.format.DateTimeFormatter)[parse](CharSequence, DateTimeFormatter) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* LocalDate {java11-javadoc}/java.base/java/time/MonthDay.html#atYear(int)[atYear](int) -* int {java11-javadoc}/java.base/java/time/MonthDay.html#compareTo(java.time.MonthDay)[compareTo](MonthDay) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/MonthDay.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* int {java11-javadoc}/java.base/java/time/MonthDay.html#getDayOfMonth()[getDayOfMonth]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* Month {java11-javadoc}/java.base/java/time/MonthDay.html#getMonth()[getMonth]() -* int {java11-javadoc}/java.base/java/time/MonthDay.html#getMonthValue()[getMonthValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/MonthDay.html#isAfter(java.time.MonthDay)[isAfter](MonthDay) -* boolean {java11-javadoc}/java.base/java/time/MonthDay.html#isBefore(java.time.MonthDay)[isBefore](MonthDay) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* boolean {java11-javadoc}/java.base/java/time/MonthDay.html#isValidYear(int)[isValidYear](int) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* MonthDay {java11-javadoc}/java.base/java/time/MonthDay.html#with(java.time.Month)[with](Month) -* MonthDay {java11-javadoc}/java.base/java/time/MonthDay.html#withDayOfMonth(int)[withDayOfMonth](int) -* MonthDay {java11-javadoc}/java.base/java/time/MonthDay.html#withMonth(int)[withMonth](int) - - -[[painless-api-reference-shared-OffsetDateTime]] -==== OffsetDateTime -* static OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#MAX[MAX] -* static OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#MIN[MIN] -* static OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#of(java.time.LocalDateTime,java.time.ZoneOffset)[of](LocalDateTime, ZoneOffset) -* static OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#of(java.time.LocalDate,java.time.LocalTime,java.time.ZoneOffset)[of](LocalDate, LocalTime, ZoneOffset) -* static OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#of(int,int,int,int,int,int,int,java.time.ZoneOffset)[of](int, int, int, int, int, int, int, ZoneOffset) -* static OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#ofInstant(java.time.Instant,java.time.ZoneId)[ofInstant](Instant, ZoneId) -* static OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#parse(java.lang.CharSequence)[parse](CharSequence) -* static OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#parse(java.lang.CharSequence,java.time.format.DateTimeFormatter)[parse](CharSequence, DateTimeFormatter) -* static Comparator {java11-javadoc}/java.base/java/time/OffsetDateTime.html#timeLineOrder()[timeLineOrder]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* ZonedDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#atZoneSameInstant(java.time.ZoneId)[atZoneSameInstant](ZoneId) -* ZonedDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#atZoneSimilarLocal(java.time.ZoneId)[atZoneSimilarLocal](ZoneId) -* int {java11-javadoc}/java.base/java/time/OffsetDateTime.html#compareTo(java.time.OffsetDateTime)[compareTo](OffsetDateTime) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/OffsetDateTime.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* int {java11-javadoc}/java.base/java/time/OffsetDateTime.html#getDayOfMonth()[getDayOfMonth]() -* DayOfWeek {java11-javadoc}/java.base/java/time/OffsetDateTime.html#getDayOfWeek()[getDayOfWeek]() -* int {java11-javadoc}/java.base/java/time/OffsetDateTime.html#getDayOfYear()[getDayOfYear]() -* int {java11-javadoc}/java.base/java/time/OffsetDateTime.html#getHour()[getHour]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/OffsetDateTime.html#getMinute()[getMinute]() -* Month {java11-javadoc}/java.base/java/time/OffsetDateTime.html#getMonth()[getMonth]() -* int {java11-javadoc}/java.base/java/time/OffsetDateTime.html#getMonthValue()[getMonthValue]() -* int {java11-javadoc}/java.base/java/time/OffsetDateTime.html#getNano()[getNano]() -* ZoneOffset {java11-javadoc}/java.base/java/time/OffsetDateTime.html#getOffset()[getOffset]() -* int {java11-javadoc}/java.base/java/time/OffsetDateTime.html#getSecond()[getSecond]() -* int {java11-javadoc}/java.base/java/time/OffsetDateTime.html#getYear()[getYear]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/OffsetDateTime.html#isAfter(java.time.OffsetDateTime)[isAfter](OffsetDateTime) -* boolean {java11-javadoc}/java.base/java/time/OffsetDateTime.html#isBefore(java.time.OffsetDateTime)[isBefore](OffsetDateTime) -* boolean {java11-javadoc}/java.base/java/time/OffsetDateTime.html#isEqual(java.time.OffsetDateTime)[isEqual](OffsetDateTime) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#minusDays(long)[minusDays](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#minusHours(long)[minusHours](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#minusMinutes(long)[minusMinutes](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#minusMonths(long)[minusMonths](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#minusNanos(long)[minusNanos](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#minusSeconds(long)[minusSeconds](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#minusWeeks(long)[minusWeeks](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#minusYears(long)[minusYears](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#plusDays(long)[plusDays](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#plusHours(long)[plusHours](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#plusMinutes(long)[plusMinutes](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#plusMonths(long)[plusMonths](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#plusNanos(long)[plusNanos](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#plusSeconds(long)[plusSeconds](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#plusWeeks(long)[plusWeeks](long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#plusYears(long)[plusYears](long) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/OffsetDateTime.html#toEpochSecond()[toEpochSecond]() -* Instant {java11-javadoc}/java.base/java/time/OffsetDateTime.html#toInstant()[toInstant]() -* LocalDate {java11-javadoc}/java.base/java/time/OffsetDateTime.html#toLocalDate()[toLocalDate]() -* LocalDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#toLocalDateTime()[toLocalDateTime]() -* LocalTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#toLocalTime()[toLocalTime]() -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#toOffsetTime()[toOffsetTime]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* ZonedDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#toZonedDateTime()[toZonedDateTime]() -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#truncatedTo(java.time.temporal.TemporalUnit)[truncatedTo](TemporalUnit) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#withDayOfMonth(int)[withDayOfMonth](int) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#withDayOfYear(int)[withDayOfYear](int) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#withHour(int)[withHour](int) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#withMinute(int)[withMinute](int) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#withMonth(int)[withMonth](int) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#withNano(int)[withNano](int) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#withOffsetSameInstant(java.time.ZoneOffset)[withOffsetSameInstant](ZoneOffset) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#withOffsetSameLocal(java.time.ZoneOffset)[withOffsetSameLocal](ZoneOffset) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#withSecond(int)[withSecond](int) -* OffsetDateTime {java11-javadoc}/java.base/java/time/OffsetDateTime.html#withYear(int)[withYear](int) - - -[[painless-api-reference-shared-OffsetTime]] -==== OffsetTime -* static OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#MAX[MAX] -* static OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#MIN[MIN] -* static OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#of(java.time.LocalTime,java.time.ZoneOffset)[of](LocalTime, ZoneOffset) -* static OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#of(int,int,int,int,java.time.ZoneOffset)[of](int, int, int, int, ZoneOffset) -* static OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#ofInstant(java.time.Instant,java.time.ZoneId)[ofInstant](Instant, ZoneId) -* static OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#parse(java.lang.CharSequence)[parse](CharSequence) -* static OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#parse(java.lang.CharSequence,java.time.format.DateTimeFormatter)[parse](CharSequence, DateTimeFormatter) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* int {java11-javadoc}/java.base/java/time/OffsetTime.html#compareTo(java.time.OffsetTime)[compareTo](OffsetTime) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/OffsetTime.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* int {java11-javadoc}/java.base/java/time/OffsetTime.html#getHour()[getHour]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/OffsetTime.html#getMinute()[getMinute]() -* int {java11-javadoc}/java.base/java/time/OffsetTime.html#getNano()[getNano]() -* ZoneOffset {java11-javadoc}/java.base/java/time/OffsetTime.html#getOffset()[getOffset]() -* int {java11-javadoc}/java.base/java/time/OffsetTime.html#getSecond()[getSecond]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/OffsetTime.html#isAfter(java.time.OffsetTime)[isAfter](OffsetTime) -* boolean {java11-javadoc}/java.base/java/time/OffsetTime.html#isBefore(java.time.OffsetTime)[isBefore](OffsetTime) -* boolean {java11-javadoc}/java.base/java/time/OffsetTime.html#isEqual(java.time.OffsetTime)[isEqual](OffsetTime) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#minusHours(long)[minusHours](long) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#minusMinutes(long)[minusMinutes](long) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#minusNanos(long)[minusNanos](long) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#minusSeconds(long)[minusSeconds](long) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#plusHours(long)[plusHours](long) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#plusMinutes(long)[plusMinutes](long) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#plusNanos(long)[plusNanos](long) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#plusSeconds(long)[plusSeconds](long) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* LocalTime {java11-javadoc}/java.base/java/time/OffsetTime.html#toLocalTime()[toLocalTime]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#truncatedTo(java.time.temporal.TemporalUnit)[truncatedTo](TemporalUnit) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#withHour(int)[withHour](int) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#withMinute(int)[withMinute](int) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#withNano(int)[withNano](int) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#withOffsetSameInstant(java.time.ZoneOffset)[withOffsetSameInstant](ZoneOffset) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#withOffsetSameLocal(java.time.ZoneOffset)[withOffsetSameLocal](ZoneOffset) -* OffsetTime {java11-javadoc}/java.base/java/time/OffsetTime.html#withSecond(int)[withSecond](int) - - -[[painless-api-reference-shared-Period]] -==== Period -* static Period {java11-javadoc}/java.base/java/time/Period.html#ZERO[ZERO] -* static Period {java11-javadoc}/java.base/java/time/Period.html#between(java.time.LocalDate,java.time.LocalDate)[between](LocalDate, LocalDate) -* static Period {java11-javadoc}/java.base/java/time/Period.html#from(java.time.temporal.TemporalAmount)[from](TemporalAmount) -* static Period {java11-javadoc}/java.base/java/time/Period.html#of(int,int,int)[of](int, int, int) -* static Period {java11-javadoc}/java.base/java/time/Period.html#ofDays(int)[ofDays](int) -* static Period {java11-javadoc}/java.base/java/time/Period.html#ofMonths(int)[ofMonths](int) -* static Period {java11-javadoc}/java.base/java/time/Period.html#ofWeeks(int)[ofWeeks](int) -* static Period {java11-javadoc}/java.base/java/time/Period.html#ofYears(int)[ofYears](int) -* static Period {java11-javadoc}/java.base/java/time/Period.html#parse(java.lang.CharSequence)[parse](CharSequence) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#addTo(java.time.temporal.Temporal)[addTo](Temporal) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#get(java.time.temporal.TemporalUnit)[get](TemporalUnit) -* IsoChronology {java11-javadoc}/java.base/java/time/Period.html#getChronology()[getChronology]() -* int {java11-javadoc}/java.base/java/time/Period.html#getDays()[getDays]() -* int {java11-javadoc}/java.base/java/time/Period.html#getMonths()[getMonths]() -* List {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#getUnits()[getUnits]() -* int {java11-javadoc}/java.base/java/time/Period.html#getYears()[getYears]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#isNegative()[isNegative]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#isZero()[isZero]() -* Period {java11-javadoc}/java.base/java/time/Period.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* Period {java11-javadoc}/java.base/java/time/Period.html#minusDays(long)[minusDays](long) -* Period {java11-javadoc}/java.base/java/time/Period.html#minusMonths(long)[minusMonths](long) -* Period {java11-javadoc}/java.base/java/time/Period.html#minusYears(long)[minusYears](long) -* Period {java11-javadoc}/java.base/java/time/Period.html#multipliedBy(int)[multipliedBy](int) -* Period {java11-javadoc}/java.base/java/time/Period.html#negated()[negated]() -* Period {java11-javadoc}/java.base/java/time/Period.html#normalized()[normalized]() -* Period {java11-javadoc}/java.base/java/time/Period.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* Period {java11-javadoc}/java.base/java/time/Period.html#plusDays(long)[plusDays](long) -* Period {java11-javadoc}/java.base/java/time/Period.html#plusMonths(long)[plusMonths](long) -* Period {java11-javadoc}/java.base/java/time/Period.html#plusYears(long)[plusYears](long) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#subtractFrom(java.time.temporal.Temporal)[subtractFrom](Temporal) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#toString()[toString]() -* long {java11-javadoc}/java.base/java/time/Period.html#toTotalMonths()[toTotalMonths]() -* Period {java11-javadoc}/java.base/java/time/Period.html#withDays(int)[withDays](int) -* Period {java11-javadoc}/java.base/java/time/Period.html#withMonths(int)[withMonths](int) -* Period {java11-javadoc}/java.base/java/time/Period.html#withYears(int)[withYears](int) - - -[[painless-api-reference-shared-Year]] -==== Year -* static int {java11-javadoc}/java.base/java/time/Year.html#MAX_VALUE[MAX_VALUE] -* static int {java11-javadoc}/java.base/java/time/Year.html#MIN_VALUE[MIN_VALUE] -* static Year {java11-javadoc}/java.base/java/time/Year.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static boolean {java11-javadoc}/java.base/java/time/Year.html#isLeap(long)[isLeap](long) -* static Year {java11-javadoc}/java.base/java/time/Year.html#of(int)[of](int) -* static Year {java11-javadoc}/java.base/java/time/Year.html#parse(java.lang.CharSequence)[parse](CharSequence) -* static Year {java11-javadoc}/java.base/java/time/Year.html#parse(java.lang.CharSequence,java.time.format.DateTimeFormatter)[parse](CharSequence, DateTimeFormatter) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* LocalDate {java11-javadoc}/java.base/java/time/Year.html#atDay(int)[atDay](int) -* YearMonth {java11-javadoc}/java.base/java/time/Year.html#atMonth(int)[atMonth](int) -* LocalDate {java11-javadoc}/java.base/java/time/Year.html#atMonthDay(java.time.MonthDay)[atMonthDay](MonthDay) -* int {java11-javadoc}/java.base/java/time/Year.html#compareTo(java.time.Year)[compareTo](Year) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/Year.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/Year.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/Year.html#isAfter(java.time.Year)[isAfter](Year) -* boolean {java11-javadoc}/java.base/java/time/Year.html#isLeap()[isLeap]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* boolean {java11-javadoc}/java.base/java/time/Year.html#isValidMonthDay(java.time.MonthDay)[isValidMonthDay](MonthDay) -* int {java11-javadoc}/java.base/java/time/Year.html#length()[length]() -* Year {java11-javadoc}/java.base/java/time/Year.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* Year {java11-javadoc}/java.base/java/time/Year.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* Year {java11-javadoc}/java.base/java/time/Year.html#minusYears(long)[minusYears](long) -* Year {java11-javadoc}/java.base/java/time/Year.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* Year {java11-javadoc}/java.base/java/time/Year.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* Year {java11-javadoc}/java.base/java/time/Year.html#plusYears(long)[plusYears](long) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* Year {java11-javadoc}/java.base/java/time/Year.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* Year {java11-javadoc}/java.base/java/time/Year.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) - - -[[painless-api-reference-shared-YearMonth]] -==== YearMonth -* static YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#of(int,int)[of](int, int) -* static YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#parse(java.lang.CharSequence)[parse](CharSequence) -* static YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#parse(java.lang.CharSequence,java.time.format.DateTimeFormatter)[parse](CharSequence, DateTimeFormatter) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* LocalDate {java11-javadoc}/java.base/java/time/YearMonth.html#atDay(int)[atDay](int) -* LocalDate {java11-javadoc}/java.base/java/time/YearMonth.html#atEndOfMonth()[atEndOfMonth]() -* int {java11-javadoc}/java.base/java/time/YearMonth.html#compareTo(java.time.YearMonth)[compareTo](YearMonth) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/YearMonth.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* Month {java11-javadoc}/java.base/java/time/YearMonth.html#getMonth()[getMonth]() -* int {java11-javadoc}/java.base/java/time/YearMonth.html#getMonthValue()[getMonthValue]() -* int {java11-javadoc}/java.base/java/time/YearMonth.html#getYear()[getYear]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/YearMonth.html#isAfter(java.time.YearMonth)[isAfter](YearMonth) -* boolean {java11-javadoc}/java.base/java/time/YearMonth.html#isBefore(java.time.YearMonth)[isBefore](YearMonth) -* boolean {java11-javadoc}/java.base/java/time/YearMonth.html#isLeapYear()[isLeapYear]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* boolean {java11-javadoc}/java.base/java/time/YearMonth.html#isValidDay(int)[isValidDay](int) -* int {java11-javadoc}/java.base/java/time/YearMonth.html#lengthOfMonth()[lengthOfMonth]() -* int {java11-javadoc}/java.base/java/time/YearMonth.html#lengthOfYear()[lengthOfYear]() -* YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#minusMonths(long)[minusMonths](long) -* YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#minusYears(long)[minusYears](long) -* YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#plusMonths(long)[plusMonths](long) -* YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#plusYears(long)[plusYears](long) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) -* YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#withMonth(int)[withMonth](int) -* YearMonth {java11-javadoc}/java.base/java/time/YearMonth.html#withYear(int)[withYear](int) - - -[[painless-api-reference-shared-ZoneId]] -==== ZoneId -* static Map {java11-javadoc}/java.base/java/time/ZoneId.html#SHORT_IDS[SHORT_IDS] -* static ZoneId {java11-javadoc}/java.base/java/time/ZoneId.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static Set {java11-javadoc}/java.base/java/time/ZoneId.html#getAvailableZoneIds()[getAvailableZoneIds]() -* static ZoneId {java11-javadoc}/java.base/java/time/ZoneId.html#of(java.lang.String)[of](null) -* static ZoneId {java11-javadoc}/java.base/java/time/ZoneId.html#of(java.lang.String,java.util.Map)[of](null, Map) -* static ZoneId {java11-javadoc}/java.base/java/time/ZoneId.html#ofOffset(java.lang.String,java.time.ZoneOffset)[ofOffset](null, ZoneOffset) -* static ZoneId {java11-javadoc}/java.base/java/time/ZoneId.html#systemDefault()[systemDefault]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/ZoneId.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* null {java11-javadoc}/java.base/java/time/ZoneId.html#getId()[getId]() -* ZoneRules {java11-javadoc}/java.base/java/time/ZoneId.html#getRules()[getRules]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* ZoneId {java11-javadoc}/java.base/java/time/ZoneId.html#normalized()[normalized]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ZoneOffset]] -==== ZoneOffset -* static ZoneOffset {java11-javadoc}/java.base/java/time/ZoneOffset.html#MAX[MAX] -* static ZoneOffset {java11-javadoc}/java.base/java/time/ZoneOffset.html#MIN[MIN] -* static ZoneOffset {java11-javadoc}/java.base/java/time/ZoneOffset.html#UTC[UTC] -* static ZoneOffset {java11-javadoc}/java.base/java/time/ZoneOffset.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static ZoneOffset {java11-javadoc}/java.base/java/time/ZoneOffset.html#of(java.lang.String)[of](null) -* static ZoneOffset {java11-javadoc}/java.base/java/time/ZoneOffset.html#ofHours(int)[ofHours](int) -* static ZoneOffset {java11-javadoc}/java.base/java/time/ZoneOffset.html#ofHoursMinutes(int,int)[ofHoursMinutes](int, int) -* static ZoneOffset {java11-javadoc}/java.base/java/time/ZoneOffset.html#ofHoursMinutesSeconds(int,int,int)[ofHoursMinutesSeconds](int, int, int) -* static ZoneOffset {java11-javadoc}/java.base/java/time/ZoneOffset.html#ofTotalSeconds(int)[ofTotalSeconds](int) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* int {java11-javadoc}/java.base/java/lang/Comparable.html#compareTo(java.lang.Object)[compareTo](def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* null {java11-javadoc}/java.base/java/time/ZoneId.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* null {java11-javadoc}/java.base/java/time/ZoneId.html#getId()[getId]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* ZoneRules {java11-javadoc}/java.base/java/time/ZoneId.html#getRules()[getRules]() -* int {java11-javadoc}/java.base/java/time/ZoneOffset.html#getTotalSeconds()[getTotalSeconds]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* ZoneId {java11-javadoc}/java.base/java/time/ZoneId.html#normalized()[normalized]() -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ZonedDateTime]] -==== ZonedDateTime -* static ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#of(java.time.LocalDateTime,java.time.ZoneId)[of](LocalDateTime, ZoneId) -* static ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#of(java.time.LocalDate,java.time.LocalTime,java.time.ZoneId)[of](LocalDate, LocalTime, ZoneId) -* static ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#of(int,int,int,int,int,int,int,java.time.ZoneId)[of](int, int, int, int, int, int, int, ZoneId) -* static ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#ofInstant(java.time.Instant,java.time.ZoneId)[ofInstant](Instant, ZoneId) -* static ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#ofInstant(java.time.LocalDateTime,java.time.ZoneOffset,java.time.ZoneId)[ofInstant](LocalDateTime, ZoneOffset, ZoneId) -* static ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#ofLocal(java.time.LocalDateTime,java.time.ZoneId,java.time.ZoneOffset)[ofLocal](LocalDateTime, ZoneId, ZoneOffset) -* static ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#ofStrict(java.time.LocalDateTime,java.time.ZoneOffset,java.time.ZoneId)[ofStrict](LocalDateTime, ZoneOffset, ZoneId) -* static ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#parse(java.lang.CharSequence)[parse](CharSequence) -* static ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#parse(java.lang.CharSequence,java.time.format.DateTimeFormatter)[parse](CharSequence, DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#compareTo(java.time.chrono.ChronoZonedDateTime)[compareTo](ChronoZonedDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* Chronology {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#getChronology()[getChronology]() -* int {java11-javadoc}/java.base/java/time/ZonedDateTime.html#getDayOfMonth()[getDayOfMonth]() -* DayOfWeek {java11-javadoc}/java.base/java/time/ZonedDateTime.html#getDayOfWeek()[getDayOfWeek]() -* int {java11-javadoc}/java.base/java/time/ZonedDateTime.html#getDayOfYear()[getDayOfYear]() -* int {java11-javadoc}/java.base/java/time/ZonedDateTime.html#getHour()[getHour]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/ZonedDateTime.html#getMinute()[getMinute]() -* Month {java11-javadoc}/java.base/java/time/ZonedDateTime.html#getMonth()[getMonth]() -* int {java11-javadoc}/java.base/java/time/ZonedDateTime.html#getMonthValue()[getMonthValue]() -* int {java11-javadoc}/java.base/java/time/ZonedDateTime.html#getNano()[getNano]() -* ZoneOffset {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#getOffset()[getOffset]() -* int {java11-javadoc}/java.base/java/time/ZonedDateTime.html#getSecond()[getSecond]() -* int {java11-javadoc}/java.base/java/time/ZonedDateTime.html#getYear()[getYear]() -* ZoneId {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#getZone()[getZone]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#isAfter(java.time.chrono.ChronoZonedDateTime)[isAfter](ChronoZonedDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#isBefore(java.time.chrono.ChronoZonedDateTime)[isBefore](ChronoZonedDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#isEqual(java.time.chrono.ChronoZonedDateTime)[isEqual](ChronoZonedDateTime) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#minusDays(long)[minusDays](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#minusHours(long)[minusHours](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#minusMinutes(long)[minusMinutes](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#minusMonths(long)[minusMonths](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#minusNanos(long)[minusNanos](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#minusSeconds(long)[minusSeconds](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#minusWeeks(long)[minusWeeks](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#minusYears(long)[minusYears](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#plusDays(long)[plusDays](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#plusHours(long)[plusHours](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#plusMinutes(long)[plusMinutes](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#plusMonths(long)[plusMonths](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#plusNanos(long)[plusNanos](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#plusSeconds(long)[plusSeconds](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#plusWeeks(long)[plusWeeks](long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#plusYears(long)[plusYears](long) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toEpochSecond()[toEpochSecond]() -* Instant {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toInstant()[toInstant]() -* LocalDate {java11-javadoc}/java.base/java/time/ZonedDateTime.html#toLocalDate()[toLocalDate]() -* LocalDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#toLocalDateTime()[toLocalDateTime]() -* LocalTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toLocalTime()[toLocalTime]() -* OffsetDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#toOffsetDateTime()[toOffsetDateTime]() -* null {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toString()[toString]() -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#truncatedTo(java.time.temporal.TemporalUnit)[truncatedTo](TemporalUnit) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withDayOfMonth(int)[withDayOfMonth](int) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withDayOfYear(int)[withDayOfYear](int) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withEarlierOffsetAtOverlap()[withEarlierOffsetAtOverlap]() -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withFixedOffsetZone()[withFixedOffsetZone]() -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withHour(int)[withHour](int) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withLaterOffsetAtOverlap()[withLaterOffsetAtOverlap]() -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withMinute(int)[withMinute](int) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withMonth(int)[withMonth](int) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withNano(int)[withNano](int) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withSecond(int)[withSecond](int) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withYear(int)[withYear](int) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withZoneSameInstant(java.time.ZoneId)[withZoneSameInstant](ZoneId) -* ZonedDateTime {java11-javadoc}/java.base/java/time/ZonedDateTime.html#withZoneSameLocal(java.time.ZoneId)[withZoneSameLocal](ZoneId) - - -[role="exclude",id="painless-api-reference-shared-java-time-chrono"] -=== Shared API for package java.time.chrono -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-AbstractChronology]] -==== AbstractChronology -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#compareTo(java.time.chrono.Chronology)[compareTo](Chronology) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#date(java.time.temporal.TemporalAccessor)[date](TemporalAccessor) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#date(int,int,int)[date](int, int, int) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#date(java.time.chrono.Era,int,int,int)[date](Era, int, int, int) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#dateEpochDay(long)[dateEpochDay](long) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#dateYearDay(int,int)[dateYearDay](int, int) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#dateYearDay(java.time.chrono.Era,int,int)[dateYearDay](Era, int, int) -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#equals(java.lang.Object)[equals](Object) -* Era {java11-javadoc}/java.base/java/time/chrono/Chronology.html#eraOf(int)[eraOf](int) -* List {java11-javadoc}/java.base/java/time/chrono/Chronology.html#eras()[eras]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getCalendarType()[getCalendarType]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getId()[getId]() -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#isLeapYear(long)[isLeapYear](long) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#localDateTime(java.time.temporal.TemporalAccessor)[localDateTime](TemporalAccessor) -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/Chronology.html#period(int,int,int)[period](int, int, int) -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#prolepticYear(java.time.chrono.Era,int)[prolepticYear](Era, int) -* ValueRange {java11-javadoc}/java.base/java/time/chrono/Chronology.html#range(java.time.temporal.ChronoField)[range](ChronoField) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#resolveDate(java.util.Map,java.time.format.ResolverStyle)[resolveDate](Map, ResolverStyle) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#toString()[toString]() -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#zonedDateTime(java.time.temporal.TemporalAccessor)[zonedDateTime](TemporalAccessor) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#zonedDateTime(java.time.Instant,java.time.ZoneId)[zonedDateTime](Instant, ZoneId) - - -[[painless-api-reference-shared-ChronoLocalDate]] -==== ChronoLocalDate -* static ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static Comparator {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#timeLineOrder()[timeLineOrder]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#atTime(java.time.LocalTime)[atTime](LocalTime) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#compareTo(java.time.chrono.ChronoLocalDate)[compareTo](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* Chronology {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#getChronology()[getChronology]() -* Era {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#getEra()[getEra]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isAfter(java.time.chrono.ChronoLocalDate)[isAfter](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isBefore(java.time.chrono.ChronoLocalDate)[isBefore](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isEqual(java.time.chrono.ChronoLocalDate)[isEqual](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isLeapYear()[isLeapYear]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#lengthOfMonth()[lengthOfMonth]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#lengthOfYear()[lengthOfYear]() -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#toEpochDay()[toEpochDay]() -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#toString()[toString]() -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#until(java.time.chrono.ChronoLocalDate)[until](ChronoLocalDate) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) - - -[[painless-api-reference-shared-ChronoLocalDateTime]] -==== ChronoLocalDateTime -* static ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static Comparator {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#timeLineOrder()[timeLineOrder]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#atZone(java.time.ZoneId)[atZone](ZoneId) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#compareTo(java.time.chrono.ChronoLocalDateTime)[compareTo](ChronoLocalDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* Chronology {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#getChronology()[getChronology]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#isAfter(java.time.chrono.ChronoLocalDateTime)[isAfter](ChronoLocalDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#isBefore(java.time.chrono.ChronoLocalDateTime)[isBefore](ChronoLocalDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#isEqual(java.time.chrono.ChronoLocalDateTime)[isEqual](ChronoLocalDateTime) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#toEpochSecond(java.time.ZoneOffset)[toEpochSecond](ZoneOffset) -* Instant {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#toInstant(java.time.ZoneOffset)[toInstant](ZoneOffset) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#toLocalDate()[toLocalDate]() -* LocalTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#toLocalTime()[toLocalTime]() -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#toString()[toString]() -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDateTime.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) - - -[[painless-api-reference-shared-ChronoPeriod]] -==== ChronoPeriod -* static ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#between(java.time.chrono.ChronoLocalDate,java.time.chrono.ChronoLocalDate)[between](ChronoLocalDate, ChronoLocalDate) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#addTo(java.time.temporal.Temporal)[addTo](Temporal) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#get(java.time.temporal.TemporalUnit)[get](TemporalUnit) -* Chronology {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#getChronology()[getChronology]() -* List {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#getUnits()[getUnits]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#isNegative()[isNegative]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#isZero()[isZero]() -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#multipliedBy(int)[multipliedBy](int) -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#negated()[negated]() -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#normalized()[normalized]() -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#subtractFrom(java.time.temporal.Temporal)[subtractFrom](Temporal) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoPeriod.html#toString()[toString]() - - -[[painless-api-reference-shared-ChronoZonedDateTime]] -==== ChronoZonedDateTime -* static ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static Comparator {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#timeLineOrder()[timeLineOrder]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#compareTo(java.time.chrono.ChronoZonedDateTime)[compareTo](ChronoZonedDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* Chronology {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#getChronology()[getChronology]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* ZoneOffset {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#getOffset()[getOffset]() -* ZoneId {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#getZone()[getZone]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#isAfter(java.time.chrono.ChronoZonedDateTime)[isAfter](ChronoZonedDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#isBefore(java.time.chrono.ChronoZonedDateTime)[isBefore](ChronoZonedDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#isEqual(java.time.chrono.ChronoZonedDateTime)[isEqual](ChronoZonedDateTime) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toEpochSecond()[toEpochSecond]() -* Instant {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toInstant()[toInstant]() -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toLocalDate()[toLocalDate]() -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toLocalDateTime()[toLocalDateTime]() -* LocalTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toLocalTime()[toLocalTime]() -* null {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toString()[toString]() -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#withEarlierOffsetAtOverlap()[withEarlierOffsetAtOverlap]() -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#withLaterOffsetAtOverlap()[withLaterOffsetAtOverlap]() -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#withZoneSameInstant(java.time.ZoneId)[withZoneSameInstant](ZoneId) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#withZoneSameLocal(java.time.ZoneId)[withZoneSameLocal](ZoneId) - - -[[painless-api-reference-shared-Chronology]] -==== Chronology -* static Chronology {java11-javadoc}/java.base/java/time/chrono/Chronology.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static Set {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getAvailableChronologies()[getAvailableChronologies]() -* static Chronology {java11-javadoc}/java.base/java/time/chrono/Chronology.html#of(java.lang.String)[of](null) -* static Chronology {java11-javadoc}/java.base/java/time/chrono/Chronology.html#ofLocale(java.util.Locale)[ofLocale](Locale) -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#compareTo(java.time.chrono.Chronology)[compareTo](Chronology) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#date(java.time.temporal.TemporalAccessor)[date](TemporalAccessor) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#date(int,int,int)[date](int, int, int) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#date(java.time.chrono.Era,int,int,int)[date](Era, int, int, int) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#dateEpochDay(long)[dateEpochDay](long) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#dateYearDay(int,int)[dateYearDay](int, int) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#dateYearDay(java.time.chrono.Era,int,int)[dateYearDay](Era, int, int) -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#equals(java.lang.Object)[equals](Object) -* Era {java11-javadoc}/java.base/java/time/chrono/Chronology.html#eraOf(int)[eraOf](int) -* List {java11-javadoc}/java.base/java/time/chrono/Chronology.html#eras()[eras]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getCalendarType()[getCalendarType]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getId()[getId]() -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#isLeapYear(long)[isLeapYear](long) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#localDateTime(java.time.temporal.TemporalAccessor)[localDateTime](TemporalAccessor) -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/Chronology.html#period(int,int,int)[period](int, int, int) -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#prolepticYear(java.time.chrono.Era,int)[prolepticYear](Era, int) -* ValueRange {java11-javadoc}/java.base/java/time/chrono/Chronology.html#range(java.time.temporal.ChronoField)[range](ChronoField) -* ChronoLocalDate {java11-javadoc}/java.base/java/time/chrono/Chronology.html#resolveDate(java.util.Map,java.time.format.ResolverStyle)[resolveDate](Map, ResolverStyle) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#toString()[toString]() -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#zonedDateTime(java.time.temporal.TemporalAccessor)[zonedDateTime](TemporalAccessor) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#zonedDateTime(java.time.Instant,java.time.ZoneId)[zonedDateTime](Instant, ZoneId) - - -[[painless-api-reference-shared-Era]] -==== Era -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* null {java11-javadoc}/java.base/java/time/chrono/Era.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/Era.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-HijrahChronology]] -==== HijrahChronology -* static HijrahChronology {java11-javadoc}/java.base/java/time/chrono/HijrahChronology.html#INSTANCE[INSTANCE] -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#compareTo(java.time.chrono.Chronology)[compareTo](Chronology) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahChronology.html#date(java.time.temporal.TemporalAccessor)[date](TemporalAccessor) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahChronology.html#date(int,int,int)[date](int, int, int) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahChronology.html#date(java.time.chrono.Era,int,int,int)[date](Era, int, int, int) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahChronology.html#dateEpochDay(long)[dateEpochDay](long) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahChronology.html#dateYearDay(int,int)[dateYearDay](int, int) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahChronology.html#dateYearDay(java.time.chrono.Era,int,int)[dateYearDay](Era, int, int) -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#equals(java.lang.Object)[equals](Object) -* HijrahEra {java11-javadoc}/java.base/java/time/chrono/HijrahChronology.html#eraOf(int)[eraOf](int) -* List {java11-javadoc}/java.base/java/time/chrono/Chronology.html#eras()[eras]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getCalendarType()[getCalendarType]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getId()[getId]() -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#isLeapYear(long)[isLeapYear](long) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#localDateTime(java.time.temporal.TemporalAccessor)[localDateTime](TemporalAccessor) -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/Chronology.html#period(int,int,int)[period](int, int, int) -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#prolepticYear(java.time.chrono.Era,int)[prolepticYear](Era, int) -* ValueRange {java11-javadoc}/java.base/java/time/chrono/Chronology.html#range(java.time.temporal.ChronoField)[range](ChronoField) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahChronology.html#resolveDate(java.util.Map,java.time.format.ResolverStyle)[resolveDate](Map, ResolverStyle) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#toString()[toString]() -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#zonedDateTime(java.time.temporal.TemporalAccessor)[zonedDateTime](TemporalAccessor) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#zonedDateTime(java.time.Instant,java.time.ZoneId)[zonedDateTime](Instant, ZoneId) - - -[[painless-api-reference-shared-HijrahDate]] -==== HijrahDate -* static HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahDate.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahDate.html#of(int,int,int)[of](int, int, int) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#atTime(java.time.LocalTime)[atTime](LocalTime) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#compareTo(java.time.chrono.ChronoLocalDate)[compareTo](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* HijrahChronology {java11-javadoc}/java.base/java/time/chrono/HijrahDate.html#getChronology()[getChronology]() -* HijrahEra {java11-javadoc}/java.base/java/time/chrono/HijrahDate.html#getEra()[getEra]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isAfter(java.time.chrono.ChronoLocalDate)[isAfter](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isBefore(java.time.chrono.ChronoLocalDate)[isBefore](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isEqual(java.time.chrono.ChronoLocalDate)[isEqual](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isLeapYear()[isLeapYear]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#lengthOfMonth()[lengthOfMonth]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#lengthOfYear()[lengthOfYear]() -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahDate.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahDate.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahDate.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahDate.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#toEpochDay()[toEpochDay]() -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#toString()[toString]() -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#until(java.time.chrono.ChronoLocalDate)[until](ChronoLocalDate) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahDate.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahDate.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) -* HijrahDate {java11-javadoc}/java.base/java/time/chrono/HijrahDate.html#withVariant(java.time.chrono.HijrahChronology)[withVariant](HijrahChronology) - - -[[painless-api-reference-shared-HijrahEra]] -==== HijrahEra -* static HijrahEra {java11-javadoc}/java.base/java/time/chrono/HijrahEra.html#AH[AH] -* static HijrahEra {java11-javadoc}/java.base/java/time/chrono/HijrahEra.html#of(int)[of](int) -* static HijrahEra {java11-javadoc}/java.base/java/time/chrono/HijrahEra.html#valueOf(java.lang.String)[valueOf](null) -* static HijrahEra[] {java11-javadoc}/java.base/java/time/chrono/HijrahEra.html#values()[values]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* null {java11-javadoc}/java.base/java/time/chrono/Era.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/HijrahEra.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IsoChronology]] -==== IsoChronology -* static IsoChronology {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#INSTANCE[INSTANCE] -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#compareTo(java.time.chrono.Chronology)[compareTo](Chronology) -* LocalDate {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#date(java.time.temporal.TemporalAccessor)[date](TemporalAccessor) -* LocalDate {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#date(int,int,int)[date](int, int, int) -* LocalDate {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#date(java.time.chrono.Era,int,int,int)[date](Era, int, int, int) -* LocalDate {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#dateEpochDay(long)[dateEpochDay](long) -* LocalDate {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#dateYearDay(int,int)[dateYearDay](int, int) -* LocalDate {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#dateYearDay(java.time.chrono.Era,int,int)[dateYearDay](Era, int, int) -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#equals(java.lang.Object)[equals](Object) -* IsoEra {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#eraOf(int)[eraOf](int) -* List {java11-javadoc}/java.base/java/time/chrono/Chronology.html#eras()[eras]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getCalendarType()[getCalendarType]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getId()[getId]() -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#isLeapYear(long)[isLeapYear](long) -* LocalDateTime {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#localDateTime(java.time.temporal.TemporalAccessor)[localDateTime](TemporalAccessor) -* Period {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#period(int,int,int)[period](int, int, int) -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#prolepticYear(java.time.chrono.Era,int)[prolepticYear](Era, int) -* ValueRange {java11-javadoc}/java.base/java/time/chrono/Chronology.html#range(java.time.temporal.ChronoField)[range](ChronoField) -* LocalDate {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#resolveDate(java.util.Map,java.time.format.ResolverStyle)[resolveDate](Map, ResolverStyle) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#toString()[toString]() -* ZonedDateTime {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#zonedDateTime(java.time.temporal.TemporalAccessor)[zonedDateTime](TemporalAccessor) -* ZonedDateTime {java11-javadoc}/java.base/java/time/chrono/IsoChronology.html#zonedDateTime(java.time.Instant,java.time.ZoneId)[zonedDateTime](Instant, ZoneId) - - -[[painless-api-reference-shared-IsoEra]] -==== IsoEra -* static IsoEra {java11-javadoc}/java.base/java/time/chrono/IsoEra.html#BCE[BCE] -* static IsoEra {java11-javadoc}/java.base/java/time/chrono/IsoEra.html#CE[CE] -* static IsoEra {java11-javadoc}/java.base/java/time/chrono/IsoEra.html#of(int)[of](int) -* static IsoEra {java11-javadoc}/java.base/java/time/chrono/IsoEra.html#valueOf(java.lang.String)[valueOf](null) -* static IsoEra[] {java11-javadoc}/java.base/java/time/chrono/IsoEra.html#values()[values]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* null {java11-javadoc}/java.base/java/time/chrono/Era.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/IsoEra.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-JapaneseChronology]] -==== JapaneseChronology -* static JapaneseChronology {java11-javadoc}/java.base/java/time/chrono/JapaneseChronology.html#INSTANCE[INSTANCE] -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#compareTo(java.time.chrono.Chronology)[compareTo](Chronology) -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseChronology.html#date(java.time.temporal.TemporalAccessor)[date](TemporalAccessor) -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseChronology.html#date(int,int,int)[date](int, int, int) -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseChronology.html#date(java.time.chrono.Era,int,int,int)[date](Era, int, int, int) -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseChronology.html#dateEpochDay(long)[dateEpochDay](long) -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseChronology.html#dateYearDay(int,int)[dateYearDay](int, int) -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseChronology.html#dateYearDay(java.time.chrono.Era,int,int)[dateYearDay](Era, int, int) -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#equals(java.lang.Object)[equals](Object) -* JapaneseEra {java11-javadoc}/java.base/java/time/chrono/JapaneseChronology.html#eraOf(int)[eraOf](int) -* List {java11-javadoc}/java.base/java/time/chrono/Chronology.html#eras()[eras]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getCalendarType()[getCalendarType]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getId()[getId]() -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#isLeapYear(long)[isLeapYear](long) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#localDateTime(java.time.temporal.TemporalAccessor)[localDateTime](TemporalAccessor) -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/Chronology.html#period(int,int,int)[period](int, int, int) -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#prolepticYear(java.time.chrono.Era,int)[prolepticYear](Era, int) -* ValueRange {java11-javadoc}/java.base/java/time/chrono/Chronology.html#range(java.time.temporal.ChronoField)[range](ChronoField) -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseChronology.html#resolveDate(java.util.Map,java.time.format.ResolverStyle)[resolveDate](Map, ResolverStyle) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#toString()[toString]() -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#zonedDateTime(java.time.temporal.TemporalAccessor)[zonedDateTime](TemporalAccessor) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#zonedDateTime(java.time.Instant,java.time.ZoneId)[zonedDateTime](Instant, ZoneId) - - -[[painless-api-reference-shared-JapaneseDate]] -==== JapaneseDate -* static JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseDate.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseDate.html#of(int,int,int)[of](int, int, int) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#atTime(java.time.LocalTime)[atTime](LocalTime) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#compareTo(java.time.chrono.ChronoLocalDate)[compareTo](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* JapaneseChronology {java11-javadoc}/java.base/java/time/chrono/JapaneseDate.html#getChronology()[getChronology]() -* JapaneseEra {java11-javadoc}/java.base/java/time/chrono/JapaneseDate.html#getEra()[getEra]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isAfter(java.time.chrono.ChronoLocalDate)[isAfter](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isBefore(java.time.chrono.ChronoLocalDate)[isBefore](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isEqual(java.time.chrono.ChronoLocalDate)[isEqual](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isLeapYear()[isLeapYear]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#lengthOfMonth()[lengthOfMonth]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#lengthOfYear()[lengthOfYear]() -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseDate.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseDate.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseDate.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseDate.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#toEpochDay()[toEpochDay]() -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#toString()[toString]() -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#until(java.time.chrono.ChronoLocalDate)[until](ChronoLocalDate) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseDate.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* JapaneseDate {java11-javadoc}/java.base/java/time/chrono/JapaneseDate.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) - - -[[painless-api-reference-shared-JapaneseEra]] -==== JapaneseEra -* static JapaneseEra {java11-javadoc}/java.base/java/time/chrono/JapaneseEra.html#HEISEI[HEISEI] -* static JapaneseEra {java11-javadoc}/java.base/java/time/chrono/JapaneseEra.html#MEIJI[MEIJI] -* static JapaneseEra {java11-javadoc}/java.base/java/time/chrono/JapaneseEra.html#SHOWA[SHOWA] -* static JapaneseEra {java11-javadoc}/java.base/java/time/chrono/JapaneseEra.html#TAISHO[TAISHO] -* static JapaneseEra {java11-javadoc}/java.base/java/time/chrono/JapaneseEra.html#of(int)[of](int) -* static JapaneseEra {java11-javadoc}/java.base/java/time/chrono/JapaneseEra.html#valueOf(java.lang.String)[valueOf](null) -* static JapaneseEra[] {java11-javadoc}/java.base/java/time/chrono/JapaneseEra.html#values()[values]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* null {java11-javadoc}/java.base/java/time/chrono/Era.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/JapaneseEra.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-MinguoChronology]] -==== MinguoChronology -* static MinguoChronology {java11-javadoc}/java.base/java/time/chrono/MinguoChronology.html#INSTANCE[INSTANCE] -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#compareTo(java.time.chrono.Chronology)[compareTo](Chronology) -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoChronology.html#date(java.time.temporal.TemporalAccessor)[date](TemporalAccessor) -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoChronology.html#date(int,int,int)[date](int, int, int) -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoChronology.html#date(java.time.chrono.Era,int,int,int)[date](Era, int, int, int) -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoChronology.html#dateEpochDay(long)[dateEpochDay](long) -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoChronology.html#dateYearDay(int,int)[dateYearDay](int, int) -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoChronology.html#dateYearDay(java.time.chrono.Era,int,int)[dateYearDay](Era, int, int) -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#equals(java.lang.Object)[equals](Object) -* MinguoEra {java11-javadoc}/java.base/java/time/chrono/MinguoChronology.html#eraOf(int)[eraOf](int) -* List {java11-javadoc}/java.base/java/time/chrono/Chronology.html#eras()[eras]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getCalendarType()[getCalendarType]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getId()[getId]() -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#isLeapYear(long)[isLeapYear](long) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#localDateTime(java.time.temporal.TemporalAccessor)[localDateTime](TemporalAccessor) -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/Chronology.html#period(int,int,int)[period](int, int, int) -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#prolepticYear(java.time.chrono.Era,int)[prolepticYear](Era, int) -* ValueRange {java11-javadoc}/java.base/java/time/chrono/Chronology.html#range(java.time.temporal.ChronoField)[range](ChronoField) -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoChronology.html#resolveDate(java.util.Map,java.time.format.ResolverStyle)[resolveDate](Map, ResolverStyle) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#toString()[toString]() -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#zonedDateTime(java.time.temporal.TemporalAccessor)[zonedDateTime](TemporalAccessor) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#zonedDateTime(java.time.Instant,java.time.ZoneId)[zonedDateTime](Instant, ZoneId) - - -[[painless-api-reference-shared-MinguoDate]] -==== MinguoDate -* static MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoDate.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoDate.html#of(int,int,int)[of](int, int, int) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#atTime(java.time.LocalTime)[atTime](LocalTime) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#compareTo(java.time.chrono.ChronoLocalDate)[compareTo](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* MinguoChronology {java11-javadoc}/java.base/java/time/chrono/MinguoDate.html#getChronology()[getChronology]() -* MinguoEra {java11-javadoc}/java.base/java/time/chrono/MinguoDate.html#getEra()[getEra]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isAfter(java.time.chrono.ChronoLocalDate)[isAfter](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isBefore(java.time.chrono.ChronoLocalDate)[isBefore](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isEqual(java.time.chrono.ChronoLocalDate)[isEqual](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isLeapYear()[isLeapYear]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#lengthOfMonth()[lengthOfMonth]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#lengthOfYear()[lengthOfYear]() -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoDate.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoDate.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoDate.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoDate.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#toEpochDay()[toEpochDay]() -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#toString()[toString]() -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#until(java.time.chrono.ChronoLocalDate)[until](ChronoLocalDate) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoDate.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* MinguoDate {java11-javadoc}/java.base/java/time/chrono/MinguoDate.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) - - -[[painless-api-reference-shared-MinguoEra]] -==== MinguoEra -* static MinguoEra {java11-javadoc}/java.base/java/time/chrono/MinguoEra.html#BEFORE_ROC[BEFORE_ROC] -* static MinguoEra {java11-javadoc}/java.base/java/time/chrono/MinguoEra.html#ROC[ROC] -* static MinguoEra {java11-javadoc}/java.base/java/time/chrono/MinguoEra.html#of(int)[of](int) -* static MinguoEra {java11-javadoc}/java.base/java/time/chrono/MinguoEra.html#valueOf(java.lang.String)[valueOf](null) -* static MinguoEra[] {java11-javadoc}/java.base/java/time/chrono/MinguoEra.html#values()[values]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* null {java11-javadoc}/java.base/java/time/chrono/Era.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/MinguoEra.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ThaiBuddhistChronology]] -==== ThaiBuddhistChronology -* static ThaiBuddhistChronology {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistChronology.html#INSTANCE[INSTANCE] -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#compareTo(java.time.chrono.Chronology)[compareTo](Chronology) -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistChronology.html#date(java.time.temporal.TemporalAccessor)[date](TemporalAccessor) -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistChronology.html#date(int,int,int)[date](int, int, int) -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistChronology.html#date(java.time.chrono.Era,int,int,int)[date](Era, int, int, int) -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistChronology.html#dateEpochDay(long)[dateEpochDay](long) -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistChronology.html#dateYearDay(int,int)[dateYearDay](int, int) -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistChronology.html#dateYearDay(java.time.chrono.Era,int,int)[dateYearDay](Era, int, int) -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#equals(java.lang.Object)[equals](Object) -* ThaiBuddhistEra {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistChronology.html#eraOf(int)[eraOf](int) -* List {java11-javadoc}/java.base/java/time/chrono/Chronology.html#eras()[eras]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getCalendarType()[getCalendarType]() -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#getId()[getId]() -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/Chronology.html#isLeapYear(long)[isLeapYear](long) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#localDateTime(java.time.temporal.TemporalAccessor)[localDateTime](TemporalAccessor) -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/Chronology.html#period(int,int,int)[period](int, int, int) -* int {java11-javadoc}/java.base/java/time/chrono/Chronology.html#prolepticYear(java.time.chrono.Era,int)[prolepticYear](Era, int) -* ValueRange {java11-javadoc}/java.base/java/time/chrono/Chronology.html#range(java.time.temporal.ChronoField)[range](ChronoField) -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistChronology.html#resolveDate(java.util.Map,java.time.format.ResolverStyle)[resolveDate](Map, ResolverStyle) -* null {java11-javadoc}/java.base/java/time/chrono/Chronology.html#toString()[toString]() -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#zonedDateTime(java.time.temporal.TemporalAccessor)[zonedDateTime](TemporalAccessor) -* ChronoZonedDateTime {java11-javadoc}/java.base/java/time/chrono/Chronology.html#zonedDateTime(java.time.Instant,java.time.ZoneId)[zonedDateTime](Instant, ZoneId) - - -[[painless-api-reference-shared-ThaiBuddhistDate]] -==== ThaiBuddhistDate -* static ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistDate.html#from(java.time.temporal.TemporalAccessor)[from](TemporalAccessor) -* static ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistDate.html#of(int,int,int)[of](int, int, int) -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* ChronoLocalDateTime {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#atTime(java.time.LocalTime)[atTime](LocalTime) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#compareTo(java.time.chrono.ChronoLocalDate)[compareTo](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* ThaiBuddhistChronology {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistDate.html#getChronology()[getChronology]() -* ThaiBuddhistEra {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistDate.html#getEra()[getEra]() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isAfter(java.time.chrono.ChronoLocalDate)[isAfter](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isBefore(java.time.chrono.ChronoLocalDate)[isBefore](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isEqual(java.time.chrono.ChronoLocalDate)[isEqual](ChronoLocalDate) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#isLeapYear()[isLeapYear]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#lengthOfMonth()[lengthOfMonth]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#lengthOfYear()[lengthOfYear]() -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistDate.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistDate.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistDate.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistDate.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#toEpochDay()[toEpochDay]() -* null {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#toString()[toString]() -* ChronoPeriod {java11-javadoc}/java.base/java/time/chrono/ChronoLocalDate.html#until(java.time.chrono.ChronoLocalDate)[until](ChronoLocalDate) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistDate.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* ThaiBuddhistDate {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistDate.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) - - -[[painless-api-reference-shared-ThaiBuddhistEra]] -==== ThaiBuddhistEra -* static ThaiBuddhistEra {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistEra.html#BE[BE] -* static ThaiBuddhistEra {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistEra.html#BEFORE_BE[BEFORE_BE] -* static ThaiBuddhistEra {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistEra.html#of(int)[of](int) -* static ThaiBuddhistEra {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistEra.html#valueOf(java.lang.String)[valueOf](null) -* static ThaiBuddhistEra[] {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistEra.html#values()[values]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* null {java11-javadoc}/java.base/java/time/chrono/Era.html#getDisplayName(java.time.format.TextStyle,java.util.Locale)[getDisplayName](TextStyle, Locale) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/time/chrono/ThaiBuddhistEra.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-java-time-format"] -=== Shared API for package java.time.format -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-DateTimeFormatter]] -==== DateTimeFormatter -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#BASIC_ISO_DATE[BASIC_ISO_DATE] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_DATE[ISO_DATE] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_DATE_TIME[ISO_DATE_TIME] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_INSTANT[ISO_INSTANT] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_LOCAL_DATE[ISO_LOCAL_DATE] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_LOCAL_DATE_TIME[ISO_LOCAL_DATE_TIME] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_LOCAL_TIME[ISO_LOCAL_TIME] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_OFFSET_DATE[ISO_OFFSET_DATE] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_OFFSET_DATE_TIME[ISO_OFFSET_DATE_TIME] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_OFFSET_TIME[ISO_OFFSET_TIME] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_ORDINAL_DATE[ISO_ORDINAL_DATE] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_TIME[ISO_TIME] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_WEEK_DATE[ISO_WEEK_DATE] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ISO_ZONED_DATE_TIME[ISO_ZONED_DATE_TIME] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#RFC_1123_DATE_TIME[RFC_1123_DATE_TIME] -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ofLocalizedDate(java.time.format.FormatStyle)[ofLocalizedDate](FormatStyle) -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ofLocalizedDateTime(java.time.format.FormatStyle)[ofLocalizedDateTime](FormatStyle) -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ofLocalizedDateTime(java.time.format.FormatStyle,java.time.format.FormatStyle)[ofLocalizedDateTime](FormatStyle, FormatStyle) -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ofLocalizedTime(java.time.format.FormatStyle)[ofLocalizedTime](FormatStyle) -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ofPattern(java.lang.String)[ofPattern](null) -* static DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#ofPattern(java.lang.String,java.util.Locale)[ofPattern](null, Locale) -* static TemporalQuery {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#parsedExcessDays()[parsedExcessDays]() -* static TemporalQuery {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#parsedLeapSecond()[parsedLeapSecond]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#format(java.time.temporal.TemporalAccessor)[format](TemporalAccessor) -* void {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#formatTo(java.time.temporal.TemporalAccessor,java.lang.Appendable)[formatTo](TemporalAccessor, Appendable) -* Chronology {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#getChronology()[getChronology]() -* DecimalStyle {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#getDecimalStyle()[getDecimalStyle]() -* Locale {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#getLocale()[getLocale]() -* Set {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#getResolverFields()[getResolverFields]() -* ResolverStyle {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#getResolverStyle()[getResolverStyle]() -* ZoneId {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#getZone()[getZone]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* TemporalAccessor {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#parse(java.lang.CharSequence)[parse](CharSequence) -* def {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#parse(java.lang.CharSequence,java.time.temporal.TemporalQuery)[parse](CharSequence, TemporalQuery) -* TemporalAccessor {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#parseBest(java.lang.CharSequence,java.time.temporal.TemporalQuery%5B%5D)[parseBest](CharSequence, TemporalQuery[]) -* TemporalAccessor {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#parseUnresolved(java.lang.CharSequence,java.text.ParsePosition)[parseUnresolved](CharSequence, ParsePosition) -* Format {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#toFormat()[toFormat]() -* Format {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#toFormat(java.time.temporal.TemporalQuery)[toFormat](TemporalQuery) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#withChronology(java.time.chrono.Chronology)[withChronology](Chronology) -* DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#withDecimalStyle(java.time.format.DecimalStyle)[withDecimalStyle](DecimalStyle) -* DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#withLocale(java.util.Locale)[withLocale](Locale) -* DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#withResolverFields(java.util.Set)[withResolverFields](Set) -* DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#withResolverStyle(java.time.format.ResolverStyle)[withResolverStyle](ResolverStyle) -* DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html#withZone(java.time.ZoneId)[withZone](ZoneId) - - -[[painless-api-reference-shared-DateTimeFormatterBuilder]] -==== DateTimeFormatterBuilder -* static null {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#getLocalizedDateTimePattern(java.time.format.FormatStyle,java.time.format.FormatStyle,java.time.chrono.Chronology,java.util.Locale)[getLocalizedDateTimePattern](FormatStyle, FormatStyle, Chronology, Locale) -* {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#()[DateTimeFormatterBuilder]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#append(java.time.format.DateTimeFormatter)[append](DateTimeFormatter) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendChronologyId()[appendChronologyId]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendChronologyText(java.time.format.TextStyle)[appendChronologyText](TextStyle) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendFraction(java.time.temporal.TemporalField,int,int,boolean)[appendFraction](TemporalField, int, int, boolean) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendInstant()[appendInstant]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendInstant(int)[appendInstant](int) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendLiteral(java.lang.String)[appendLiteral](null) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendLocalized(java.time.format.FormatStyle,java.time.format.FormatStyle)[appendLocalized](FormatStyle, FormatStyle) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendLocalizedOffset(java.time.format.TextStyle)[appendLocalizedOffset](TextStyle) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendOffset(java.lang.String,java.lang.String)[appendOffset](null, null) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendOffsetId()[appendOffsetId]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendOptional(java.time.format.DateTimeFormatter)[appendOptional](DateTimeFormatter) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendPattern(java.lang.String)[appendPattern](null) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendText(java.time.temporal.TemporalField)[appendText](TemporalField) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendText(java.time.temporal.TemporalField,java.time.format.TextStyle)[appendText](TemporalField, TextStyle) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendValue(java.time.temporal.TemporalField)[appendValue](TemporalField) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendValue(java.time.temporal.TemporalField,int)[appendValue](TemporalField, int) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendValue(java.time.temporal.TemporalField,int,int,java.time.format.SignStyle)[appendValue](TemporalField, int, int, SignStyle) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendValueReduced(java.time.temporal.TemporalField,int,int,int)[appendValueReduced](TemporalField, int, int, int) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendZoneId()[appendZoneId]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendZoneOrOffsetId()[appendZoneOrOffsetId]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendZoneRegionId()[appendZoneRegionId]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendZoneText(java.time.format.TextStyle)[appendZoneText](TextStyle) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#appendZoneText(java.time.format.TextStyle,java.util.Set)[appendZoneText](TextStyle, Set) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#optionalEnd()[optionalEnd]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#optionalStart()[optionalStart]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#padNext(int)[padNext](int) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#padNext(int,char)[padNext](int, char) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#parseCaseInsensitive()[parseCaseInsensitive]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#parseCaseSensitive()[parseCaseSensitive]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#parseDefaulting(java.time.temporal.TemporalField,long)[parseDefaulting](TemporalField, long) -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#parseLenient()[parseLenient]() -* DateTimeFormatterBuilder {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#parseStrict()[parseStrict]() -* DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#toFormatter()[toFormatter]() -* DateTimeFormatter {java11-javadoc}/java.base/java/time/format/DateTimeFormatterBuilder.html#toFormatter(java.util.Locale)[toFormatter](Locale) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DateTimeParseException]] -==== DateTimeParseException -* {java11-javadoc}/java.base/java/time/format/DateTimeParseException.html#(java.lang.String,java.lang.CharSequence,int)[DateTimeParseException](null, CharSequence, int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/format/DateTimeParseException.html#getErrorIndex()[getErrorIndex]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* null {java11-javadoc}/java.base/java/time/format/DateTimeParseException.html#getParsedString()[getParsedString]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DecimalStyle]] -==== DecimalStyle -* static DecimalStyle {java11-javadoc}/java.base/java/time/format/DecimalStyle.html#STANDARD[STANDARD] -* static Set {java11-javadoc}/java.base/java/time/format/DecimalStyle.html#getAvailableLocales()[getAvailableLocales]() -* static DecimalStyle {java11-javadoc}/java.base/java/time/format/DecimalStyle.html#of(java.util.Locale)[of](Locale) -* static DecimalStyle {java11-javadoc}/java.base/java/time/format/DecimalStyle.html#ofDefaultLocale()[ofDefaultLocale]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* char {java11-javadoc}/java.base/java/time/format/DecimalStyle.html#getDecimalSeparator()[getDecimalSeparator]() -* char {java11-javadoc}/java.base/java/time/format/DecimalStyle.html#getNegativeSign()[getNegativeSign]() -* char {java11-javadoc}/java.base/java/time/format/DecimalStyle.html#getPositiveSign()[getPositiveSign]() -* char {java11-javadoc}/java.base/java/time/format/DecimalStyle.html#getZeroDigit()[getZeroDigit]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* DecimalStyle {java11-javadoc}/java.base/java/time/format/DecimalStyle.html#withDecimalSeparator(char)[withDecimalSeparator](char) -* DecimalStyle {java11-javadoc}/java.base/java/time/format/DecimalStyle.html#withNegativeSign(char)[withNegativeSign](char) -* DecimalStyle {java11-javadoc}/java.base/java/time/format/DecimalStyle.html#withPositiveSign(char)[withPositiveSign](char) -* DecimalStyle {java11-javadoc}/java.base/java/time/format/DecimalStyle.html#withZeroDigit(char)[withZeroDigit](char) - - -[[painless-api-reference-shared-FormatStyle]] -==== FormatStyle -* static FormatStyle {java11-javadoc}/java.base/java/time/format/FormatStyle.html#FULL[FULL] -* static FormatStyle {java11-javadoc}/java.base/java/time/format/FormatStyle.html#LONG[LONG] -* static FormatStyle {java11-javadoc}/java.base/java/time/format/FormatStyle.html#MEDIUM[MEDIUM] -* static FormatStyle {java11-javadoc}/java.base/java/time/format/FormatStyle.html#SHORT[SHORT] -* static FormatStyle {java11-javadoc}/java.base/java/time/format/FormatStyle.html#valueOf(java.lang.String)[valueOf](null) -* static FormatStyle[] {java11-javadoc}/java.base/java/time/format/FormatStyle.html#values()[values]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ResolverStyle]] -==== ResolverStyle -* static ResolverStyle {java11-javadoc}/java.base/java/time/format/ResolverStyle.html#LENIENT[LENIENT] -* static ResolverStyle {java11-javadoc}/java.base/java/time/format/ResolverStyle.html#SMART[SMART] -* static ResolverStyle {java11-javadoc}/java.base/java/time/format/ResolverStyle.html#STRICT[STRICT] -* static ResolverStyle {java11-javadoc}/java.base/java/time/format/ResolverStyle.html#valueOf(java.lang.String)[valueOf](null) -* static ResolverStyle[] {java11-javadoc}/java.base/java/time/format/ResolverStyle.html#values()[values]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-SignStyle]] -==== SignStyle -* static SignStyle {java11-javadoc}/java.base/java/time/format/SignStyle.html#ALWAYS[ALWAYS] -* static SignStyle {java11-javadoc}/java.base/java/time/format/SignStyle.html#EXCEEDS_PAD[EXCEEDS_PAD] -* static SignStyle {java11-javadoc}/java.base/java/time/format/SignStyle.html#NEVER[NEVER] -* static SignStyle {java11-javadoc}/java.base/java/time/format/SignStyle.html#NORMAL[NORMAL] -* static SignStyle {java11-javadoc}/java.base/java/time/format/SignStyle.html#NOT_NEGATIVE[NOT_NEGATIVE] -* static SignStyle {java11-javadoc}/java.base/java/time/format/SignStyle.html#valueOf(java.lang.String)[valueOf](null) -* static SignStyle[] {java11-javadoc}/java.base/java/time/format/SignStyle.html#values()[values]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-TextStyle]] -==== TextStyle -* static TextStyle {java11-javadoc}/java.base/java/time/format/TextStyle.html#FULL[FULL] -* static TextStyle {java11-javadoc}/java.base/java/time/format/TextStyle.html#FULL_STANDALONE[FULL_STANDALONE] -* static TextStyle {java11-javadoc}/java.base/java/time/format/TextStyle.html#NARROW[NARROW] -* static TextStyle {java11-javadoc}/java.base/java/time/format/TextStyle.html#NARROW_STANDALONE[NARROW_STANDALONE] -* static TextStyle {java11-javadoc}/java.base/java/time/format/TextStyle.html#SHORT[SHORT] -* static TextStyle {java11-javadoc}/java.base/java/time/format/TextStyle.html#SHORT_STANDALONE[SHORT_STANDALONE] -* static TextStyle {java11-javadoc}/java.base/java/time/format/TextStyle.html#valueOf(java.lang.String)[valueOf](null) -* static TextStyle[] {java11-javadoc}/java.base/java/time/format/TextStyle.html#values()[values]() -* TextStyle {java11-javadoc}/java.base/java/time/format/TextStyle.html#asNormal()[asNormal]() -* TextStyle {java11-javadoc}/java.base/java/time/format/TextStyle.html#asStandalone()[asStandalone]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/format/TextStyle.html#isStandalone()[isStandalone]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-java-time-temporal"] -=== Shared API for package java.time.temporal -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-ChronoField]] -==== ChronoField -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#ALIGNED_DAY_OF_WEEK_IN_MONTH[ALIGNED_DAY_OF_WEEK_IN_MONTH] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#ALIGNED_DAY_OF_WEEK_IN_YEAR[ALIGNED_DAY_OF_WEEK_IN_YEAR] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#ALIGNED_WEEK_OF_MONTH[ALIGNED_WEEK_OF_MONTH] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#ALIGNED_WEEK_OF_YEAR[ALIGNED_WEEK_OF_YEAR] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#AMPM_OF_DAY[AMPM_OF_DAY] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#CLOCK_HOUR_OF_AMPM[CLOCK_HOUR_OF_AMPM] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#CLOCK_HOUR_OF_DAY[CLOCK_HOUR_OF_DAY] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#DAY_OF_MONTH[DAY_OF_MONTH] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#DAY_OF_WEEK[DAY_OF_WEEK] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#DAY_OF_YEAR[DAY_OF_YEAR] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#EPOCH_DAY[EPOCH_DAY] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#ERA[ERA] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#HOUR_OF_AMPM[HOUR_OF_AMPM] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#HOUR_OF_DAY[HOUR_OF_DAY] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#INSTANT_SECONDS[INSTANT_SECONDS] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#MICRO_OF_DAY[MICRO_OF_DAY] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#MICRO_OF_SECOND[MICRO_OF_SECOND] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#MILLI_OF_DAY[MILLI_OF_DAY] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#MILLI_OF_SECOND[MILLI_OF_SECOND] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#MINUTE_OF_DAY[MINUTE_OF_DAY] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#MINUTE_OF_HOUR[MINUTE_OF_HOUR] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#MONTH_OF_YEAR[MONTH_OF_YEAR] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#NANO_OF_DAY[NANO_OF_DAY] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#NANO_OF_SECOND[NANO_OF_SECOND] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#OFFSET_SECONDS[OFFSET_SECONDS] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#PROLEPTIC_MONTH[PROLEPTIC_MONTH] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#SECOND_OF_DAY[SECOND_OF_DAY] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#SECOND_OF_MINUTE[SECOND_OF_MINUTE] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#YEAR[YEAR] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#YEAR_OF_ERA[YEAR_OF_ERA] -* static ChronoField {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#valueOf(java.lang.String)[valueOf](null) -* static ChronoField[] {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#values()[values]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#adjustInto(java.time.temporal.Temporal,long)[adjustInto](Temporal, long) -* int {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#checkValidIntValue(long)[checkValidIntValue](long) -* long {java11-javadoc}/java.base/java/time/temporal/ChronoField.html#checkValidValue(long)[checkValidValue](long) -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* TemporalUnit {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#getBaseUnit()[getBaseUnit]() -* null {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#getDisplayName(java.util.Locale)[getDisplayName](Locale) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#getFrom(java.time.temporal.TemporalAccessor)[getFrom](TemporalAccessor) -* TemporalUnit {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#getRangeUnit()[getRangeUnit]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#isDateBased()[isDateBased]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#isSupportedBy(java.time.temporal.TemporalAccessor)[isSupportedBy](TemporalAccessor) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#isTimeBased()[isTimeBased]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#range()[range]() -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#rangeRefinedBy(java.time.temporal.TemporalAccessor)[rangeRefinedBy](TemporalAccessor) -* TemporalAccessor {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#resolve(java.util.Map,java.time.temporal.TemporalAccessor,java.time.format.ResolverStyle)[resolve](Map, TemporalAccessor, ResolverStyle) -* null {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#toString()[toString]() - - -[[painless-api-reference-shared-ChronoUnit]] -==== ChronoUnit -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#CENTURIES[CENTURIES] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#DAYS[DAYS] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#DECADES[DECADES] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#ERAS[ERAS] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#FOREVER[FOREVER] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#HALF_DAYS[HALF_DAYS] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#HOURS[HOURS] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#MICROS[MICROS] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#MILLENNIA[MILLENNIA] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#MILLIS[MILLIS] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#MINUTES[MINUTES] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#MONTHS[MONTHS] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#NANOS[NANOS] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#SECONDS[SECONDS] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#WEEKS[WEEKS] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#YEARS[YEARS] -* static ChronoUnit {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#valueOf(java.lang.String)[valueOf](null) -* static ChronoUnit[] {java11-javadoc}/java.base/java/time/temporal/ChronoUnit.html#values()[values]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#addTo(java.time.temporal.Temporal,long)[addTo](Temporal, long) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#between(java.time.temporal.Temporal,java.time.temporal.Temporal)[between](Temporal, Temporal) -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* Duration {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#getDuration()[getDuration]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#isDateBased()[isDateBased]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#isDurationEstimated()[isDurationEstimated]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#isSupportedBy(java.time.temporal.Temporal)[isSupportedBy](Temporal) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#isTimeBased()[isTimeBased]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#toString()[toString]() - - -[[painless-api-reference-shared-IsoFields]] -==== IsoFields -* static TemporalField {java11-javadoc}/java.base/java/time/temporal/IsoFields.html#DAY_OF_QUARTER[DAY_OF_QUARTER] -* static TemporalField {java11-javadoc}/java.base/java/time/temporal/IsoFields.html#QUARTER_OF_YEAR[QUARTER_OF_YEAR] -* static TemporalUnit {java11-javadoc}/java.base/java/time/temporal/IsoFields.html#QUARTER_YEARS[QUARTER_YEARS] -* static TemporalField {java11-javadoc}/java.base/java/time/temporal/IsoFields.html#WEEK_BASED_YEAR[WEEK_BASED_YEAR] -* static TemporalUnit {java11-javadoc}/java.base/java/time/temporal/IsoFields.html#WEEK_BASED_YEARS[WEEK_BASED_YEARS] -* static TemporalField {java11-javadoc}/java.base/java/time/temporal/IsoFields.html#WEEK_OF_WEEK_BASED_YEAR[WEEK_OF_WEEK_BASED_YEAR] -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-JulianFields]] -==== JulianFields -* static TemporalField {java11-javadoc}/java.base/java/time/temporal/JulianFields.html#JULIAN_DAY[JULIAN_DAY] -* static TemporalField {java11-javadoc}/java.base/java/time/temporal/JulianFields.html#MODIFIED_JULIAN_DAY[MODIFIED_JULIAN_DAY] -* static TemporalField {java11-javadoc}/java.base/java/time/temporal/JulianFields.html#RATA_DIE[RATA_DIE] -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Temporal]] -==== Temporal -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* Temporal {java11-javadoc}/java.base/java/time/temporal/Temporal.html#minus(java.time.temporal.TemporalAmount)[minus](TemporalAmount) -* Temporal {java11-javadoc}/java.base/java/time/temporal/Temporal.html#minus(long,java.time.temporal.TemporalUnit)[minus](long, TemporalUnit) -* Temporal {java11-javadoc}/java.base/java/time/temporal/Temporal.html#plus(java.time.temporal.TemporalAmount)[plus](TemporalAmount) -* Temporal {java11-javadoc}/java.base/java/time/temporal/Temporal.html#plus(long,java.time.temporal.TemporalUnit)[plus](long, TemporalUnit) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* Temporal {java11-javadoc}/java.base/java/time/temporal/Temporal.html#with(java.time.temporal.TemporalAdjuster)[with](TemporalAdjuster) -* Temporal {java11-javadoc}/java.base/java/time/temporal/Temporal.html#with(java.time.temporal.TemporalField,long)[with](TemporalField, long) - - -[[painless-api-reference-shared-TemporalAccessor]] -==== TemporalAccessor -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-TemporalAdjuster]] -==== TemporalAdjuster -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAdjuster.html#adjustInto(java.time.temporal.Temporal)[adjustInto](Temporal) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-TemporalAdjusters]] -==== TemporalAdjusters -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#dayOfWeekInMonth(int,java.time.DayOfWeek)[dayOfWeekInMonth](int, DayOfWeek) -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#firstDayOfMonth()[firstDayOfMonth]() -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#firstDayOfNextMonth()[firstDayOfNextMonth]() -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#firstDayOfNextYear()[firstDayOfNextYear]() -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#firstDayOfYear()[firstDayOfYear]() -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#firstInMonth(java.time.DayOfWeek)[firstInMonth](DayOfWeek) -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#lastDayOfMonth()[lastDayOfMonth]() -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#lastDayOfYear()[lastDayOfYear]() -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#lastInMonth(java.time.DayOfWeek)[lastInMonth](DayOfWeek) -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#next(java.time.DayOfWeek)[next](DayOfWeek) -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#nextOrSame(java.time.DayOfWeek)[nextOrSame](DayOfWeek) -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#ofDateAdjuster(java.util.function.UnaryOperator)[ofDateAdjuster](UnaryOperator) -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#previous(java.time.DayOfWeek)[previous](DayOfWeek) -* static TemporalAdjuster {java11-javadoc}/java.base/java/time/temporal/TemporalAdjusters.html#previousOrSame(java.time.DayOfWeek)[previousOrSame](DayOfWeek) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-TemporalAmount]] -==== TemporalAmount -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#addTo(java.time.temporal.Temporal)[addTo](Temporal) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#get(java.time.temporal.TemporalUnit)[get](TemporalUnit) -* List {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#getUnits()[getUnits]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalAmount.html#subtractFrom(java.time.temporal.Temporal)[subtractFrom](Temporal) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-TemporalField]] -==== TemporalField -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#adjustInto(java.time.temporal.Temporal,long)[adjustInto](Temporal, long) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* TemporalUnit {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#getBaseUnit()[getBaseUnit]() -* null {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#getDisplayName(java.util.Locale)[getDisplayName](Locale) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#getFrom(java.time.temporal.TemporalAccessor)[getFrom](TemporalAccessor) -* TemporalUnit {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#getRangeUnit()[getRangeUnit]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#isDateBased()[isDateBased]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#isSupportedBy(java.time.temporal.TemporalAccessor)[isSupportedBy](TemporalAccessor) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#isTimeBased()[isTimeBased]() -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#range()[range]() -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#rangeRefinedBy(java.time.temporal.TemporalAccessor)[rangeRefinedBy](TemporalAccessor) -* TemporalAccessor {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#resolve(java.util.Map,java.time.temporal.TemporalAccessor,java.time.format.ResolverStyle)[resolve](Map, TemporalAccessor, ResolverStyle) -* null {java11-javadoc}/java.base/java/time/temporal/TemporalField.html#toString()[toString]() - - -[[painless-api-reference-shared-TemporalQueries]] -==== TemporalQueries -* static TemporalQuery {java11-javadoc}/java.base/java/time/temporal/TemporalQueries.html#chronology()[chronology]() -* static TemporalQuery {java11-javadoc}/java.base/java/time/temporal/TemporalQueries.html#localDate()[localDate]() -* static TemporalQuery {java11-javadoc}/java.base/java/time/temporal/TemporalQueries.html#localTime()[localTime]() -* static TemporalQuery {java11-javadoc}/java.base/java/time/temporal/TemporalQueries.html#offset()[offset]() -* static TemporalQuery {java11-javadoc}/java.base/java/time/temporal/TemporalQueries.html#precision()[precision]() -* static TemporalQuery {java11-javadoc}/java.base/java/time/temporal/TemporalQueries.html#zone()[zone]() -* static TemporalQuery {java11-javadoc}/java.base/java/time/temporal/TemporalQueries.html#zoneId()[zoneId]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-TemporalQuery]] -==== TemporalQuery -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* def {java11-javadoc}/java.base/java/time/temporal/TemporalQuery.html#queryFrom(java.time.temporal.TemporalAccessor)[queryFrom](TemporalAccessor) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-TemporalUnit]] -==== TemporalUnit -* Temporal {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#addTo(java.time.temporal.Temporal,long)[addTo](Temporal, long) -* long {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#between(java.time.temporal.Temporal,java.time.temporal.Temporal)[between](Temporal, Temporal) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* Duration {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#getDuration()[getDuration]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#isDateBased()[isDateBased]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#isDurationEstimated()[isDurationEstimated]() -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#isSupportedBy(java.time.temporal.Temporal)[isSupportedBy](Temporal) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#isTimeBased()[isTimeBased]() -* null {java11-javadoc}/java.base/java/time/temporal/TemporalUnit.html#toString()[toString]() - - -[[painless-api-reference-shared-UnsupportedTemporalTypeException]] -==== UnsupportedTemporalTypeException -* {java11-javadoc}/java.base/java/time/temporal/UnsupportedTemporalTypeException.html#(java.lang.String)[UnsupportedTemporalTypeException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ValueRange]] -==== ValueRange -* static ValueRange {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#of(long,long)[of](long, long) -* static ValueRange {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#of(long,long,long)[of](long, long, long) -* static ValueRange {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#of(long,long,long,long)[of](long, long, long, long) -* int {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#checkValidIntValue(long,java.time.temporal.TemporalField)[checkValidIntValue](long, TemporalField) -* long {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#checkValidValue(long,java.time.temporal.TemporalField)[checkValidValue](long, TemporalField) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#getLargestMinimum()[getLargestMinimum]() -* long {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#getMaximum()[getMaximum]() -* long {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#getMinimum()[getMinimum]() -* long {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#getSmallestMaximum()[getSmallestMaximum]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#isFixed()[isFixed]() -* boolean {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#isIntValue()[isIntValue]() -* boolean {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#isValidIntValue(long)[isValidIntValue](long) -* boolean {java11-javadoc}/java.base/java/time/temporal/ValueRange.html#isValidValue(long)[isValidValue](long) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-WeekFields]] -==== WeekFields -* static WeekFields {java11-javadoc}/java.base/java/time/temporal/WeekFields.html#ISO[ISO] -* static WeekFields {java11-javadoc}/java.base/java/time/temporal/WeekFields.html#SUNDAY_START[SUNDAY_START] -* static TemporalUnit {java11-javadoc}/java.base/java/time/temporal/WeekFields.html#WEEK_BASED_YEARS[WEEK_BASED_YEARS] -* static WeekFields {java11-javadoc}/java.base/java/time/temporal/WeekFields.html#of(java.util.Locale)[of](Locale) -* static WeekFields {java11-javadoc}/java.base/java/time/temporal/WeekFields.html#of(java.time.DayOfWeek,int)[of](DayOfWeek, int) -* TemporalField {java11-javadoc}/java.base/java/time/temporal/WeekFields.html#dayOfWeek()[dayOfWeek]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* DayOfWeek {java11-javadoc}/java.base/java/time/temporal/WeekFields.html#getFirstDayOfWeek()[getFirstDayOfWeek]() -* int {java11-javadoc}/java.base/java/time/temporal/WeekFields.html#getMinimalDaysInFirstWeek()[getMinimalDaysInFirstWeek]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* TemporalField {java11-javadoc}/java.base/java/time/temporal/WeekFields.html#weekBasedYear()[weekBasedYear]() -* TemporalField {java11-javadoc}/java.base/java/time/temporal/WeekFields.html#weekOfMonth()[weekOfMonth]() -* TemporalField {java11-javadoc}/java.base/java/time/temporal/WeekFields.html#weekOfWeekBasedYear()[weekOfWeekBasedYear]() -* TemporalField {java11-javadoc}/java.base/java/time/temporal/WeekFields.html#weekOfYear()[weekOfYear]() - - -[role="exclude",id="painless-api-reference-shared-java-time-zone"] -=== Shared API for package java.time.zone -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-ZoneOffsetTransition]] -==== ZoneOffsetTransition -* static ZoneOffsetTransition {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransition.html#of(java.time.LocalDateTime,java.time.ZoneOffset,java.time.ZoneOffset)[of](LocalDateTime, ZoneOffset, ZoneOffset) -* int {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransition.html#compareTo(java.time.zone.ZoneOffsetTransition)[compareTo](ZoneOffsetTransition) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* LocalDateTime {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransition.html#getDateTimeAfter()[getDateTimeAfter]() -* LocalDateTime {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransition.html#getDateTimeBefore()[getDateTimeBefore]() -* Duration {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransition.html#getDuration()[getDuration]() -* Instant {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransition.html#getInstant()[getInstant]() -* ZoneOffset {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransition.html#getOffsetAfter()[getOffsetAfter]() -* ZoneOffset {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransition.html#getOffsetBefore()[getOffsetBefore]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransition.html#isGap()[isGap]() -* boolean {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransition.html#isOverlap()[isOverlap]() -* boolean {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransition.html#isValidOffset(java.time.ZoneOffset)[isValidOffset](ZoneOffset) -* long {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransition.html#toEpochSecond()[toEpochSecond]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ZoneOffsetTransitionRule]] -==== ZoneOffsetTransitionRule -* static ZoneOffsetTransitionRule {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule.html#of(java.time.Month,int,java.time.DayOfWeek,java.time.LocalTime,boolean,java.time.zone.ZoneOffsetTransitionRule$TimeDefinition,java.time.ZoneOffset,java.time.ZoneOffset,java.time.ZoneOffset)[of](Month, int, DayOfWeek, LocalTime, boolean, ZoneOffsetTransitionRule.TimeDefinition, ZoneOffset, ZoneOffset, ZoneOffset) -* ZoneOffsetTransition {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule.html#createTransition(int)[createTransition](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule.html#getDayOfMonthIndicator()[getDayOfMonthIndicator]() -* DayOfWeek {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule.html#getDayOfWeek()[getDayOfWeek]() -* LocalTime {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule.html#getLocalTime()[getLocalTime]() -* Month {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule.html#getMonth()[getMonth]() -* ZoneOffset {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule.html#getOffsetAfter()[getOffsetAfter]() -* ZoneOffset {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule.html#getOffsetBefore()[getOffsetBefore]() -* ZoneOffset {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule.html#getStandardOffset()[getStandardOffset]() -* ZoneOffsetTransitionRule.TimeDefinition {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule.html#getTimeDefinition()[getTimeDefinition]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule.html#isMidnightEndOfDay()[isMidnightEndOfDay]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ZoneOffsetTransitionRule-TimeDefinition]] -==== ZoneOffsetTransitionRule.TimeDefinition -* static ZoneOffsetTransitionRule.TimeDefinition {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule$TimeDefinition.html#STANDARD[STANDARD] -* static ZoneOffsetTransitionRule.TimeDefinition {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule$TimeDefinition.html#UTC[UTC] -* static ZoneOffsetTransitionRule.TimeDefinition {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule$TimeDefinition.html#WALL[WALL] -* static ZoneOffsetTransitionRule.TimeDefinition {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule$TimeDefinition.html#valueOf(java.lang.String)[valueOf](null) -* static ZoneOffsetTransitionRule.TimeDefinition[] {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule$TimeDefinition.html#values()[values]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* LocalDateTime {java11-javadoc}/java.base/java/time/zone/ZoneOffsetTransitionRule$TimeDefinition.html#createDateTime(java.time.LocalDateTime,java.time.ZoneOffset,java.time.ZoneOffset)[createDateTime](LocalDateTime, ZoneOffset, ZoneOffset) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ZoneRules]] -==== ZoneRules -* static ZoneRules {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#of(java.time.ZoneOffset)[of](ZoneOffset) -* static ZoneRules {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#of(java.time.ZoneOffset,java.time.ZoneOffset,java.util.List,java.util.List,java.util.List)[of](ZoneOffset, ZoneOffset, List, List, List) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* Duration {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#getDaylightSavings(java.time.Instant)[getDaylightSavings](Instant) -* ZoneOffset {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#getOffset(java.time.Instant)[getOffset](Instant) -* ZoneOffset {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#getStandardOffset(java.time.Instant)[getStandardOffset](Instant) -* ZoneOffsetTransition {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#getTransition(java.time.LocalDateTime)[getTransition](LocalDateTime) -* List {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#getTransitionRules()[getTransitionRules]() -* List {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#getTransitions()[getTransitions]() -* List {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#getValidOffsets(java.time.LocalDateTime)[getValidOffsets](LocalDateTime) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#isDaylightSavings(java.time.Instant)[isDaylightSavings](Instant) -* boolean {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#isFixedOffset()[isFixedOffset]() -* boolean {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#isValidOffset(java.time.LocalDateTime,java.time.ZoneOffset)[isValidOffset](LocalDateTime, ZoneOffset) -* ZoneOffsetTransition {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#nextTransition(java.time.Instant)[nextTransition](Instant) -* ZoneOffsetTransition {java11-javadoc}/java.base/java/time/zone/ZoneRules.html#previousTransition(java.time.Instant)[previousTransition](Instant) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ZoneRulesException]] -==== ZoneRulesException -* {java11-javadoc}/java.base/java/time/zone/ZoneRulesException.html#(java.lang.String)[ZoneRulesException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ZoneRulesProvider]] -==== ZoneRulesProvider -* static Set {java11-javadoc}/java.base/java/time/zone/ZoneRulesProvider.html#getAvailableZoneIds()[getAvailableZoneIds]() -* static ZoneRules {java11-javadoc}/java.base/java/time/zone/ZoneRulesProvider.html#getRules(java.lang.String,boolean)[getRules](null, boolean) -* static NavigableMap {java11-javadoc}/java.base/java/time/zone/ZoneRulesProvider.html#getVersions(java.lang.String)[getVersions](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-java-util"] -=== Shared API for package java.util -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-AbstractCollection]] -==== AbstractCollection -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-AbstractList]] -==== AbstractList -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* def {java11-javadoc}/java.base/java/util/List.html#get(int)[get](int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-AbstractMap]] -==== AbstractMap -* void {java11-javadoc}/java.base/java/util/Map.html#clear()[clear]() -* List collect(BiFunction) -* def collect(Collection, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#compute(java.lang.Object,java.util.function.BiFunction)[compute](def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfAbsent(java.lang.Object,java.util.function.Function)[computeIfAbsent](def, Function) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfPresent(java.lang.Object,java.util.function.BiFunction)[computeIfPresent](def, BiFunction) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsKey(java.lang.Object)[containsKey](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsValue(java.lang.Object)[containsValue](def) -* int count(BiPredicate) -* def each(BiConsumer) -* Set {java11-javadoc}/java.base/java/util/Map.html#entrySet()[entrySet]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#equals(java.lang.Object)[equals](Object) -* boolean every(BiPredicate) -* Map.Entry find(BiPredicate) -* Map findAll(BiPredicate) -* def findResult(BiFunction) -* def findResult(def, BiFunction) -* List findResults(BiFunction) -* void {java11-javadoc}/java.base/java/util/Map.html#forEach(java.util.function.BiConsumer)[forEach](BiConsumer) -* def {java11-javadoc}/java.base/java/util/Map.html#get(java.lang.Object)[get](def) -* Object getByPath(null) -* Object getByPath(null, Object) -* def {java11-javadoc}/java.base/java/util/Map.html#getOrDefault(java.lang.Object,java.lang.Object)[getOrDefault](def, def) -* Map groupBy(BiFunction) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#isEmpty()[isEmpty]() -* Set {java11-javadoc}/java.base/java/util/Map.html#keySet()[keySet]() -* def {java11-javadoc}/java.base/java/util/Map.html#merge(java.lang.Object,java.lang.Object,java.util.function.BiFunction)[merge](def, def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#put(java.lang.Object,java.lang.Object)[put](def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#putAll(java.util.Map)[putAll](Map) -* def {java11-javadoc}/java.base/java/util/Map.html#putIfAbsent(java.lang.Object,java.lang.Object)[putIfAbsent](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object,java.lang.Object)[remove](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object)[replace](def, def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object,java.lang.Object)[replace](def, def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#replaceAll(java.util.function.BiFunction)[replaceAll](BiFunction) -* int {java11-javadoc}/java.base/java/util/Map.html#size()[size]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Collection {java11-javadoc}/java.base/java/util/Map.html#values()[values]() - - -[[painless-api-reference-shared-AbstractMap-SimpleEntry]] -==== AbstractMap.SimpleEntry -* {java11-javadoc}/java.base/java/util/AbstractMap$SimpleEntry.html#(java.util.Map$Entry)[AbstractMap.SimpleEntry](Map.Entry) -* {java11-javadoc}/java.base/java/util/AbstractMap$SimpleEntry.html#(java.lang.Object,java.lang.Object)[AbstractMap.SimpleEntry](def, def) -* boolean {java11-javadoc}/java.base/java/util/Map$Entry.html#equals(java.lang.Object)[equals](Object) -* def {java11-javadoc}/java.base/java/util/Map$Entry.html#getKey()[getKey]() -* def {java11-javadoc}/java.base/java/util/Map$Entry.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/util/Map$Entry.html#hashCode()[hashCode]() -* def {java11-javadoc}/java.base/java/util/Map$Entry.html#setValue(java.lang.Object)[setValue](def) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-AbstractMap-SimpleImmutableEntry]] -==== AbstractMap.SimpleImmutableEntry -* {java11-javadoc}/java.base/java/util/AbstractMap$SimpleImmutableEntry.html#(java.util.Map$Entry)[AbstractMap.SimpleImmutableEntry](Map.Entry) -* {java11-javadoc}/java.base/java/util/AbstractMap$SimpleImmutableEntry.html#(java.lang.Object,java.lang.Object)[AbstractMap.SimpleImmutableEntry](def, def) -* boolean {java11-javadoc}/java.base/java/util/Map$Entry.html#equals(java.lang.Object)[equals](Object) -* def {java11-javadoc}/java.base/java/util/Map$Entry.html#getKey()[getKey]() -* def {java11-javadoc}/java.base/java/util/Map$Entry.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/util/Map$Entry.html#hashCode()[hashCode]() -* def {java11-javadoc}/java.base/java/util/Map$Entry.html#setValue(java.lang.Object)[setValue](def) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-AbstractQueue]] -==== AbstractQueue -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* def {java11-javadoc}/java.base/java/util/Queue.html#element()[element]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* boolean {java11-javadoc}/java.base/java/util/Queue.html#offer(java.lang.Object)[offer](def) -* def {java11-javadoc}/java.base/java/util/Queue.html#peek()[peek]() -* def {java11-javadoc}/java.base/java/util/Queue.html#poll()[poll]() -* def {java11-javadoc}/java.base/java/util/Queue.html#remove()[remove]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-AbstractSequentialList]] -==== AbstractSequentialList -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* def {java11-javadoc}/java.base/java/util/List.html#get(int)[get](int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-AbstractSet]] -==== AbstractSet -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/Set.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/Set.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* boolean {java11-javadoc}/java.base/java/util/Set.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ArrayDeque]] -==== ArrayDeque -* {java11-javadoc}/java.base/java/util/ArrayDeque.html#()[ArrayDeque]() -* {java11-javadoc}/java.base/java/util/ArrayDeque.html#(java.util.Collection)[ArrayDeque](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* void {java11-javadoc}/java.base/java/util/Deque.html#addFirst(java.lang.Object)[addFirst](def) -* void {java11-javadoc}/java.base/java/util/Deque.html#addLast(java.lang.Object)[addLast](def) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* ArrayDeque {java11-javadoc}/java.base/java/util/ArrayDeque.html#clone()[clone]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* Iterator {java11-javadoc}/java.base/java/util/Deque.html#descendingIterator()[descendingIterator]() -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* def {java11-javadoc}/java.base/java/util/Queue.html#element()[element]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* def {java11-javadoc}/java.base/java/util/Deque.html#getFirst()[getFirst]() -* def {java11-javadoc}/java.base/java/util/Deque.html#getLast()[getLast]() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* boolean {java11-javadoc}/java.base/java/util/Queue.html#offer(java.lang.Object)[offer](def) -* boolean {java11-javadoc}/java.base/java/util/Deque.html#offerFirst(java.lang.Object)[offerFirst](def) -* boolean {java11-javadoc}/java.base/java/util/Deque.html#offerLast(java.lang.Object)[offerLast](def) -* def {java11-javadoc}/java.base/java/util/Queue.html#peek()[peek]() -* def {java11-javadoc}/java.base/java/util/Deque.html#peekFirst()[peekFirst]() -* def {java11-javadoc}/java.base/java/util/Deque.html#peekLast()[peekLast]() -* def {java11-javadoc}/java.base/java/util/Queue.html#poll()[poll]() -* def {java11-javadoc}/java.base/java/util/Deque.html#pollFirst()[pollFirst]() -* def {java11-javadoc}/java.base/java/util/Deque.html#pollLast()[pollLast]() -* def {java11-javadoc}/java.base/java/util/Deque.html#pop()[pop]() -* void {java11-javadoc}/java.base/java/util/Deque.html#push(java.lang.Object)[push](def) -* def {java11-javadoc}/java.base/java/util/Queue.html#remove()[remove]() -* boolean {java11-javadoc}/java.base/java/util/Deque.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* def {java11-javadoc}/java.base/java/util/Deque.html#removeFirst()[removeFirst]() -* boolean {java11-javadoc}/java.base/java/util/Deque.html#removeFirstOccurrence(java.lang.Object)[removeFirstOccurrence](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* def {java11-javadoc}/java.base/java/util/Deque.html#removeLast()[removeLast]() -* boolean {java11-javadoc}/java.base/java/util/Deque.html#removeLastOccurrence(java.lang.Object)[removeLastOccurrence](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ArrayList]] -==== ArrayList -* {java11-javadoc}/java.base/java/util/ArrayList.html#()[ArrayList]() -* {java11-javadoc}/java.base/java/util/ArrayList.html#(java.util.Collection)[ArrayList](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* def {java11-javadoc}/java.base/java/util/ArrayList.html#clone()[clone]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* def {java11-javadoc}/java.base/java/util/List.html#get(int)[get](int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* void {java11-javadoc}/java.base/java/util/ArrayList.html#trimToSize()[trimToSize]() - - -[[painless-api-reference-shared-Arrays]] -==== Arrays -* static List {java11-javadoc}/java.base/java/util/Arrays.html#asList(java.lang.Object%5B%5D)[asList](Object[]) -* static boolean {java11-javadoc}/java.base/java/util/Arrays.html#deepEquals(java.lang.Object%5B%5D,java.lang.Object%5B%5D)[deepEquals](Object[], Object[]) -* static int {java11-javadoc}/java.base/java/util/Arrays.html#deepHashCode(java.lang.Object%5B%5D)[deepHashCode](Object[]) -* static null {java11-javadoc}/java.base/java/util/Arrays.html#deepToString(java.lang.Object%5B%5D)[deepToString](Object[]) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Base64]] -==== Base64 -* static Base64.Decoder {java11-javadoc}/java.base/java/util/Base64.html#getDecoder()[getDecoder]() -* static Base64.Encoder {java11-javadoc}/java.base/java/util/Base64.html#getEncoder()[getEncoder]() -* static Base64.Decoder {java11-javadoc}/java.base/java/util/Base64.html#getMimeDecoder()[getMimeDecoder]() -* static Base64.Encoder {java11-javadoc}/java.base/java/util/Base64.html#getMimeEncoder()[getMimeEncoder]() -* static Base64.Encoder {java11-javadoc}/java.base/java/util/Base64.html#getMimeEncoder(int,byte%5B%5D)[getMimeEncoder](int, byte[]) -* static Base64.Decoder {java11-javadoc}/java.base/java/util/Base64.html#getUrlDecoder()[getUrlDecoder]() -* static Base64.Encoder {java11-javadoc}/java.base/java/util/Base64.html#getUrlEncoder()[getUrlEncoder]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Base64-Decoder]] -==== Base64.Decoder -* byte[] {java11-javadoc}/java.base/java/util/Base64$Decoder.html#decode(java.lang.String)[decode](null) -* int {java11-javadoc}/java.base/java/util/Base64$Decoder.html#decode(byte%5B%5D,byte%5B%5D)[decode](byte[], byte[]) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Base64-Encoder]] -==== Base64.Encoder -* int {java11-javadoc}/java.base/java/util/Base64$Encoder.html#encode(byte%5B%5D,byte%5B%5D)[encode](byte[], byte[]) -* null {java11-javadoc}/java.base/java/util/Base64$Encoder.html#encodeToString(byte%5B%5D)[encodeToString](byte[]) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Base64.Encoder {java11-javadoc}/java.base/java/util/Base64$Encoder.html#withoutPadding()[withoutPadding]() - - -[[painless-api-reference-shared-BitSet]] -==== BitSet -* static BitSet {java11-javadoc}/java.base/java/util/BitSet.html#valueOf(long%5B%5D)[valueOf](long[]) -* {java11-javadoc}/java.base/java/util/BitSet.html#()[BitSet]() -* {java11-javadoc}/java.base/java/util/BitSet.html#(int)[BitSet](int) -* void {java11-javadoc}/java.base/java/util/BitSet.html#and(java.util.BitSet)[and](BitSet) -* void {java11-javadoc}/java.base/java/util/BitSet.html#andNot(java.util.BitSet)[andNot](BitSet) -* int {java11-javadoc}/java.base/java/util/BitSet.html#cardinality()[cardinality]() -* void {java11-javadoc}/java.base/java/util/BitSet.html#clear()[clear]() -* void {java11-javadoc}/java.base/java/util/BitSet.html#clear(int)[clear](int) -* void {java11-javadoc}/java.base/java/util/BitSet.html#clear(int,int)[clear](int, int) -* def {java11-javadoc}/java.base/java/util/BitSet.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* void {java11-javadoc}/java.base/java/util/BitSet.html#flip(int)[flip](int) -* void {java11-javadoc}/java.base/java/util/BitSet.html#flip(int,int)[flip](int, int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/BitSet.html#intersects(java.util.BitSet)[intersects](BitSet) -* boolean {java11-javadoc}/java.base/java/util/BitSet.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/util/BitSet.html#length()[length]() -* int {java11-javadoc}/java.base/java/util/BitSet.html#nextClearBit(int)[nextClearBit](int) -* int {java11-javadoc}/java.base/java/util/BitSet.html#nextSetBit(int)[nextSetBit](int) -* void {java11-javadoc}/java.base/java/util/BitSet.html#or(java.util.BitSet)[or](BitSet) -* int {java11-javadoc}/java.base/java/util/BitSet.html#previousClearBit(int)[previousClearBit](int) -* int {java11-javadoc}/java.base/java/util/BitSet.html#previousSetBit(int)[previousSetBit](int) -* void {java11-javadoc}/java.base/java/util/BitSet.html#set(int)[set](int) -* void {java11-javadoc}/java.base/java/util/BitSet.html#set(int,int)[set](int, int) -* void {java11-javadoc}/java.base/java/util/BitSet.html#set(int,int,boolean)[set](int, int, boolean) -* int {java11-javadoc}/java.base/java/util/BitSet.html#size()[size]() -* byte[] {java11-javadoc}/java.base/java/util/BitSet.html#toByteArray()[toByteArray]() -* long[] {java11-javadoc}/java.base/java/util/BitSet.html#toLongArray()[toLongArray]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* void {java11-javadoc}/java.base/java/util/BitSet.html#xor(java.util.BitSet)[xor](BitSet) - - -[[painless-api-reference-shared-Calendar]] -==== Calendar -* static int {java11-javadoc}/java.base/java/util/Calendar.html#ALL_STYLES[ALL_STYLES] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#AM[AM] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#AM_PM[AM_PM] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#APRIL[APRIL] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#AUGUST[AUGUST] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#DATE[DATE] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#DAY_OF_MONTH[DAY_OF_MONTH] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#DAY_OF_WEEK[DAY_OF_WEEK] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#DAY_OF_WEEK_IN_MONTH[DAY_OF_WEEK_IN_MONTH] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#DAY_OF_YEAR[DAY_OF_YEAR] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#DECEMBER[DECEMBER] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#DST_OFFSET[DST_OFFSET] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#ERA[ERA] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#FEBRUARY[FEBRUARY] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#FIELD_COUNT[FIELD_COUNT] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#FRIDAY[FRIDAY] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#HOUR[HOUR] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#HOUR_OF_DAY[HOUR_OF_DAY] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#JANUARY[JANUARY] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#JULY[JULY] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#JUNE[JUNE] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#LONG[LONG] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#LONG_FORMAT[LONG_FORMAT] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#LONG_STANDALONE[LONG_STANDALONE] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#MARCH[MARCH] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#MAY[MAY] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#MILLISECOND[MILLISECOND] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#MINUTE[MINUTE] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#MONDAY[MONDAY] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#MONTH[MONTH] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#NARROW_FORMAT[NARROW_FORMAT] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#NARROW_STANDALONE[NARROW_STANDALONE] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#NOVEMBER[NOVEMBER] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#OCTOBER[OCTOBER] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#PM[PM] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#SATURDAY[SATURDAY] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#SECOND[SECOND] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#SEPTEMBER[SEPTEMBER] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#SHORT[SHORT] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#SHORT_FORMAT[SHORT_FORMAT] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#SHORT_STANDALONE[SHORT_STANDALONE] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#SUNDAY[SUNDAY] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#THURSDAY[THURSDAY] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#TUESDAY[TUESDAY] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#UNDECIMBER[UNDECIMBER] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#WEDNESDAY[WEDNESDAY] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#WEEK_OF_MONTH[WEEK_OF_MONTH] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#WEEK_OF_YEAR[WEEK_OF_YEAR] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#YEAR[YEAR] -* static int {java11-javadoc}/java.base/java/util/Calendar.html#ZONE_OFFSET[ZONE_OFFSET] -* static Set {java11-javadoc}/java.base/java/util/Calendar.html#getAvailableCalendarTypes()[getAvailableCalendarTypes]() -* static Locale[] {java11-javadoc}/java.base/java/util/Calendar.html#getAvailableLocales()[getAvailableLocales]() -* static Calendar {java11-javadoc}/java.base/java/util/Calendar.html#getInstance()[getInstance]() -* static Calendar {java11-javadoc}/java.base/java/util/Calendar.html#getInstance(java.util.TimeZone)[getInstance](TimeZone) -* static Calendar {java11-javadoc}/java.base/java/util/Calendar.html#getInstance(java.util.TimeZone,java.util.Locale)[getInstance](TimeZone, Locale) -* void {java11-javadoc}/java.base/java/util/Calendar.html#add(int,int)[add](int, int) -* boolean {java11-javadoc}/java.base/java/util/Calendar.html#after(java.lang.Object)[after](Object) -* boolean {java11-javadoc}/java.base/java/util/Calendar.html#before(java.lang.Object)[before](Object) -* void {java11-javadoc}/java.base/java/util/Calendar.html#clear()[clear]() -* void {java11-javadoc}/java.base/java/util/Calendar.html#clear(int)[clear](int) -* def {java11-javadoc}/java.base/java/util/Calendar.html#clone()[clone]() -* int {java11-javadoc}/java.base/java/util/Calendar.html#compareTo(java.util.Calendar)[compareTo](Calendar) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/util/Calendar.html#get(int)[get](int) -* int {java11-javadoc}/java.base/java/util/Calendar.html#getActualMaximum(int)[getActualMaximum](int) -* int {java11-javadoc}/java.base/java/util/Calendar.html#getActualMinimum(int)[getActualMinimum](int) -* null {java11-javadoc}/java.base/java/util/Calendar.html#getCalendarType()[getCalendarType]() -* null {java11-javadoc}/java.base/java/util/Calendar.html#getDisplayName(int,int,java.util.Locale)[getDisplayName](int, int, Locale) -* Map {java11-javadoc}/java.base/java/util/Calendar.html#getDisplayNames(int,int,java.util.Locale)[getDisplayNames](int, int, Locale) -* int {java11-javadoc}/java.base/java/util/Calendar.html#getFirstDayOfWeek()[getFirstDayOfWeek]() -* int {java11-javadoc}/java.base/java/util/Calendar.html#getGreatestMinimum(int)[getGreatestMinimum](int) -* int {java11-javadoc}/java.base/java/util/Calendar.html#getLeastMaximum(int)[getLeastMaximum](int) -* int {java11-javadoc}/java.base/java/util/Calendar.html#getMaximum(int)[getMaximum](int) -* int {java11-javadoc}/java.base/java/util/Calendar.html#getMinimalDaysInFirstWeek()[getMinimalDaysInFirstWeek]() -* int {java11-javadoc}/java.base/java/util/Calendar.html#getMinimum(int)[getMinimum](int) -* Date {java11-javadoc}/java.base/java/util/Calendar.html#getTime()[getTime]() -* long {java11-javadoc}/java.base/java/util/Calendar.html#getTimeInMillis()[getTimeInMillis]() -* TimeZone {java11-javadoc}/java.base/java/util/Calendar.html#getTimeZone()[getTimeZone]() -* int {java11-javadoc}/java.base/java/util/Calendar.html#getWeekYear()[getWeekYear]() -* int {java11-javadoc}/java.base/java/util/Calendar.html#getWeeksInWeekYear()[getWeeksInWeekYear]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Calendar.html#isLenient()[isLenient]() -* boolean {java11-javadoc}/java.base/java/util/Calendar.html#isSet(int)[isSet](int) -* boolean {java11-javadoc}/java.base/java/util/Calendar.html#isWeekDateSupported()[isWeekDateSupported]() -* void {java11-javadoc}/java.base/java/util/Calendar.html#roll(int,int)[roll](int, int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#set(int,int)[set](int, int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#set(int,int,int)[set](int, int, int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#set(int,int,int,int,int)[set](int, int, int, int, int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#set(int,int,int,int,int,int)[set](int, int, int, int, int, int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setFirstDayOfWeek(int)[setFirstDayOfWeek](int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setLenient(boolean)[setLenient](boolean) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setMinimalDaysInFirstWeek(int)[setMinimalDaysInFirstWeek](int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setTime(java.util.Date)[setTime](Date) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setTimeInMillis(long)[setTimeInMillis](long) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setTimeZone(java.util.TimeZone)[setTimeZone](TimeZone) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setWeekDate(int,int,int)[setWeekDate](int, int, int) -* Instant {java11-javadoc}/java.base/java/util/Calendar.html#toInstant()[toInstant]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Calendar-Builder]] -==== Calendar.Builder -* {java11-javadoc}/java.base/java/util/Calendar$Builder.html#()[Calendar.Builder]() -* Calendar {java11-javadoc}/java.base/java/util/Calendar$Builder.html#build()[build]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Calendar.Builder {java11-javadoc}/java.base/java/util/Calendar$Builder.html#set(int,int)[set](int, int) -* Calendar.Builder {java11-javadoc}/java.base/java/util/Calendar$Builder.html#setCalendarType(java.lang.String)[setCalendarType](null) -* Calendar.Builder {java11-javadoc}/java.base/java/util/Calendar$Builder.html#setDate(int,int,int)[setDate](int, int, int) -* Calendar.Builder {java11-javadoc}/java.base/java/util/Calendar$Builder.html#setFields(int%5B%5D)[setFields](int[]) -* Calendar.Builder {java11-javadoc}/java.base/java/util/Calendar$Builder.html#setInstant(long)[setInstant](long) -* Calendar.Builder {java11-javadoc}/java.base/java/util/Calendar$Builder.html#setLenient(boolean)[setLenient](boolean) -* Calendar.Builder {java11-javadoc}/java.base/java/util/Calendar$Builder.html#setLocale(java.util.Locale)[setLocale](Locale) -* Calendar.Builder {java11-javadoc}/java.base/java/util/Calendar$Builder.html#setTimeOfDay(int,int,int)[setTimeOfDay](int, int, int) -* Calendar.Builder {java11-javadoc}/java.base/java/util/Calendar$Builder.html#setTimeOfDay(int,int,int,int)[setTimeOfDay](int, int, int, int) -* Calendar.Builder {java11-javadoc}/java.base/java/util/Calendar$Builder.html#setTimeZone(java.util.TimeZone)[setTimeZone](TimeZone) -* Calendar.Builder {java11-javadoc}/java.base/java/util/Calendar$Builder.html#setWeekDate(int,int,int)[setWeekDate](int, int, int) -* Calendar.Builder {java11-javadoc}/java.base/java/util/Calendar$Builder.html#setWeekDefinition(int,int)[setWeekDefinition](int, int) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Collection]] -==== Collection -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Collections]] -==== Collections -* static List {java11-javadoc}/java.base/java/util/Collections.html#EMPTY_LIST[EMPTY_LIST] -* static Map {java11-javadoc}/java.base/java/util/Collections.html#EMPTY_MAP[EMPTY_MAP] -* static Set {java11-javadoc}/java.base/java/util/Collections.html#EMPTY_SET[EMPTY_SET] -* static boolean {java11-javadoc}/java.base/java/util/Collections.html#addAll(java.util.Collection,java.lang.Object%5B%5D)[addAll](Collection, def[]) -* static Queue {java11-javadoc}/java.base/java/util/Collections.html#asLifoQueue(java.util.Deque)[asLifoQueue](Deque) -* static int {java11-javadoc}/java.base/java/util/Collections.html#binarySearch(java.util.List,java.lang.Object)[binarySearch](List, def) -* static int {java11-javadoc}/java.base/java/util/Collections.html#binarySearch(java.util.List,java.lang.Object,java.util.Comparator)[binarySearch](List, def, Comparator) -* static void {java11-javadoc}/java.base/java/util/Collections.html#copy(java.util.List,java.util.List)[copy](List, List) -* static boolean {java11-javadoc}/java.base/java/util/Collections.html#disjoint(java.util.Collection,java.util.Collection)[disjoint](Collection, Collection) -* static Enumeration {java11-javadoc}/java.base/java/util/Collections.html#emptyEnumeration()[emptyEnumeration]() -* static Iterator {java11-javadoc}/java.base/java/util/Collections.html#emptyIterator()[emptyIterator]() -* static List {java11-javadoc}/java.base/java/util/Collections.html#emptyList()[emptyList]() -* static ListIterator {java11-javadoc}/java.base/java/util/Collections.html#emptyListIterator()[emptyListIterator]() -* static Map {java11-javadoc}/java.base/java/util/Collections.html#emptyMap()[emptyMap]() -* static NavigableMap {java11-javadoc}/java.base/java/util/Collections.html#emptyNavigableMap()[emptyNavigableMap]() -* static NavigableSet {java11-javadoc}/java.base/java/util/Collections.html#emptyNavigableSet()[emptyNavigableSet]() -* static Set {java11-javadoc}/java.base/java/util/Collections.html#emptySet()[emptySet]() -* static SortedMap {java11-javadoc}/java.base/java/util/Collections.html#emptySortedMap()[emptySortedMap]() -* static SortedSet {java11-javadoc}/java.base/java/util/Collections.html#emptySortedSet()[emptySortedSet]() -* static Enumeration {java11-javadoc}/java.base/java/util/Collections.html#enumeration(java.util.Collection)[enumeration](Collection) -* static void {java11-javadoc}/java.base/java/util/Collections.html#fill(java.util.List,java.lang.Object)[fill](List, def) -* static int {java11-javadoc}/java.base/java/util/Collections.html#frequency(java.util.Collection,java.lang.Object)[frequency](Collection, def) -* static int {java11-javadoc}/java.base/java/util/Collections.html#indexOfSubList(java.util.List,java.util.List)[indexOfSubList](List, List) -* static int {java11-javadoc}/java.base/java/util/Collections.html#lastIndexOfSubList(java.util.List,java.util.List)[lastIndexOfSubList](List, List) -* static ArrayList {java11-javadoc}/java.base/java/util/Collections.html#list(java.util.Enumeration)[list](Enumeration) -* static def {java11-javadoc}/java.base/java/util/Collections.html#max(java.util.Collection)[max](Collection) -* static def {java11-javadoc}/java.base/java/util/Collections.html#max(java.util.Collection,java.util.Comparator)[max](Collection, Comparator) -* static def {java11-javadoc}/java.base/java/util/Collections.html#min(java.util.Collection)[min](Collection) -* static def {java11-javadoc}/java.base/java/util/Collections.html#min(java.util.Collection,java.util.Comparator)[min](Collection, Comparator) -* static List {java11-javadoc}/java.base/java/util/Collections.html#nCopies(int,java.lang.Object)[nCopies](int, def) -* static Set {java11-javadoc}/java.base/java/util/Collections.html#newSetFromMap(java.util.Map)[newSetFromMap](Map) -* static boolean {java11-javadoc}/java.base/java/util/Collections.html#replaceAll(java.util.List,java.lang.Object,java.lang.Object)[replaceAll](List, def, def) -* static void {java11-javadoc}/java.base/java/util/Collections.html#reverse(java.util.List)[reverse](List) -* static Comparator {java11-javadoc}/java.base/java/util/Collections.html#reverseOrder()[reverseOrder]() -* static Comparator {java11-javadoc}/java.base/java/util/Collections.html#reverseOrder(java.util.Comparator)[reverseOrder](Comparator) -* static void {java11-javadoc}/java.base/java/util/Collections.html#rotate(java.util.List,int)[rotate](List, int) -* static void {java11-javadoc}/java.base/java/util/Collections.html#shuffle(java.util.List)[shuffle](List) -* static void {java11-javadoc}/java.base/java/util/Collections.html#shuffle(java.util.List,java.util.Random)[shuffle](List, Random) -* static Set {java11-javadoc}/java.base/java/util/Collections.html#singleton(java.lang.Object)[singleton](def) -* static List {java11-javadoc}/java.base/java/util/Collections.html#singletonList(java.lang.Object)[singletonList](def) -* static Map {java11-javadoc}/java.base/java/util/Collections.html#singletonMap(java.lang.Object,java.lang.Object)[singletonMap](def, def) -* static void {java11-javadoc}/java.base/java/util/Collections.html#sort(java.util.List)[sort](List) -* static void {java11-javadoc}/java.base/java/util/Collections.html#sort(java.util.List,java.util.Comparator)[sort](List, Comparator) -* static void {java11-javadoc}/java.base/java/util/Collections.html#swap(java.util.List,int,int)[swap](List, int, int) -* static Collection {java11-javadoc}/java.base/java/util/Collections.html#unmodifiableCollection(java.util.Collection)[unmodifiableCollection](Collection) -* static List {java11-javadoc}/java.base/java/util/Collections.html#unmodifiableList(java.util.List)[unmodifiableList](List) -* static Map {java11-javadoc}/java.base/java/util/Collections.html#unmodifiableMap(java.util.Map)[unmodifiableMap](Map) -* static NavigableMap {java11-javadoc}/java.base/java/util/Collections.html#unmodifiableNavigableMap(java.util.NavigableMap)[unmodifiableNavigableMap](NavigableMap) -* static NavigableSet {java11-javadoc}/java.base/java/util/Collections.html#unmodifiableNavigableSet(java.util.NavigableSet)[unmodifiableNavigableSet](NavigableSet) -* static Set {java11-javadoc}/java.base/java/util/Collections.html#unmodifiableSet(java.util.Set)[unmodifiableSet](Set) -* static SortedMap {java11-javadoc}/java.base/java/util/Collections.html#unmodifiableSortedMap(java.util.SortedMap)[unmodifiableSortedMap](SortedMap) -* static SortedSet {java11-javadoc}/java.base/java/util/Collections.html#unmodifiableSortedSet(java.util.SortedSet)[unmodifiableSortedSet](SortedSet) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Comparator]] -==== Comparator -* static Comparator {java11-javadoc}/java.base/java/util/Comparator.html#comparing(java.util.function.Function)[comparing](Function) -* static Comparator {java11-javadoc}/java.base/java/util/Comparator.html#comparing(java.util.function.Function,java.util.Comparator)[comparing](Function, Comparator) -* static Comparator {java11-javadoc}/java.base/java/util/Comparator.html#comparingDouble(java.util.function.ToDoubleFunction)[comparingDouble](ToDoubleFunction) -* static Comparator {java11-javadoc}/java.base/java/util/Comparator.html#comparingInt(java.util.function.ToIntFunction)[comparingInt](ToIntFunction) -* static Comparator {java11-javadoc}/java.base/java/util/Comparator.html#comparingLong(java.util.function.ToLongFunction)[comparingLong](ToLongFunction) -* static Comparator {java11-javadoc}/java.base/java/util/Comparator.html#naturalOrder()[naturalOrder]() -* static Comparator {java11-javadoc}/java.base/java/util/Comparator.html#nullsFirst(java.util.Comparator)[nullsFirst](Comparator) -* static Comparator {java11-javadoc}/java.base/java/util/Comparator.html#nullsLast(java.util.Comparator)[nullsLast](Comparator) -* static Comparator {java11-javadoc}/java.base/java/util/Comparator.html#reverseOrder()[reverseOrder]() -* int {java11-javadoc}/java.base/java/util/Comparator.html#compare(java.lang.Object,java.lang.Object)[compare](def, def) -* boolean {java11-javadoc}/java.base/java/util/Comparator.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#reversed()[reversed]() -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparing(java.util.Comparator)[thenComparing](Comparator) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparing(java.util.function.Function,java.util.Comparator)[thenComparing](Function, Comparator) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparingDouble(java.util.function.ToDoubleFunction)[thenComparingDouble](ToDoubleFunction) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparingInt(java.util.function.ToIntFunction)[thenComparingInt](ToIntFunction) -* Comparator {java11-javadoc}/java.base/java/util/Comparator.html#thenComparingLong(java.util.function.ToLongFunction)[thenComparingLong](ToLongFunction) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ConcurrentModificationException]] -==== ConcurrentModificationException -* {java11-javadoc}/java.base/java/util/ConcurrentModificationException.html#()[ConcurrentModificationException]() -* {java11-javadoc}/java.base/java/util/ConcurrentModificationException.html#(java.lang.String)[ConcurrentModificationException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Currency]] -==== Currency -* static Set {java11-javadoc}/java.base/java/util/Currency.html#getAvailableCurrencies()[getAvailableCurrencies]() -* static Currency {java11-javadoc}/java.base/java/util/Currency.html#getInstance(java.lang.String)[getInstance](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/util/Currency.html#getCurrencyCode()[getCurrencyCode]() -* int {java11-javadoc}/java.base/java/util/Currency.html#getDefaultFractionDigits()[getDefaultFractionDigits]() -* null {java11-javadoc}/java.base/java/util/Currency.html#getDisplayName()[getDisplayName]() -* null {java11-javadoc}/java.base/java/util/Currency.html#getDisplayName(java.util.Locale)[getDisplayName](Locale) -* int {java11-javadoc}/java.base/java/util/Currency.html#getNumericCode()[getNumericCode]() -* null {java11-javadoc}/java.base/java/util/Currency.html#getSymbol()[getSymbol]() -* null {java11-javadoc}/java.base/java/util/Currency.html#getSymbol(java.util.Locale)[getSymbol](Locale) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Date]] -==== Date -* static Date {java11-javadoc}/java.base/java/util/Date.html#from(java.time.Instant)[from](Instant) -* {java11-javadoc}/java.base/java/util/Date.html#()[Date]() -* {java11-javadoc}/java.base/java/util/Date.html#(long)[Date](long) -* boolean {java11-javadoc}/java.base/java/util/Date.html#after(java.util.Date)[after](Date) -* boolean {java11-javadoc}/java.base/java/util/Date.html#before(java.util.Date)[before](Date) -* def {java11-javadoc}/java.base/java/util/Date.html#clone()[clone]() -* int {java11-javadoc}/java.base/java/util/Date.html#compareTo(java.util.Date)[compareTo](Date) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/util/Date.html#getTime()[getTime]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* void {java11-javadoc}/java.base/java/util/Date.html#setTime(long)[setTime](long) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Deque]] -==== Deque -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* void {java11-javadoc}/java.base/java/util/Deque.html#addFirst(java.lang.Object)[addFirst](def) -* void {java11-javadoc}/java.base/java/util/Deque.html#addLast(java.lang.Object)[addLast](def) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* Iterator {java11-javadoc}/java.base/java/util/Deque.html#descendingIterator()[descendingIterator]() -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* def {java11-javadoc}/java.base/java/util/Queue.html#element()[element]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* def {java11-javadoc}/java.base/java/util/Deque.html#getFirst()[getFirst]() -* def {java11-javadoc}/java.base/java/util/Deque.html#getLast()[getLast]() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* boolean {java11-javadoc}/java.base/java/util/Queue.html#offer(java.lang.Object)[offer](def) -* boolean {java11-javadoc}/java.base/java/util/Deque.html#offerFirst(java.lang.Object)[offerFirst](def) -* boolean {java11-javadoc}/java.base/java/util/Deque.html#offerLast(java.lang.Object)[offerLast](def) -* def {java11-javadoc}/java.base/java/util/Queue.html#peek()[peek]() -* def {java11-javadoc}/java.base/java/util/Deque.html#peekFirst()[peekFirst]() -* def {java11-javadoc}/java.base/java/util/Deque.html#peekLast()[peekLast]() -* def {java11-javadoc}/java.base/java/util/Queue.html#poll()[poll]() -* def {java11-javadoc}/java.base/java/util/Deque.html#pollFirst()[pollFirst]() -* def {java11-javadoc}/java.base/java/util/Deque.html#pollLast()[pollLast]() -* def {java11-javadoc}/java.base/java/util/Deque.html#pop()[pop]() -* void {java11-javadoc}/java.base/java/util/Deque.html#push(java.lang.Object)[push](def) -* def {java11-javadoc}/java.base/java/util/Queue.html#remove()[remove]() -* boolean {java11-javadoc}/java.base/java/util/Deque.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* def {java11-javadoc}/java.base/java/util/Deque.html#removeFirst()[removeFirst]() -* boolean {java11-javadoc}/java.base/java/util/Deque.html#removeFirstOccurrence(java.lang.Object)[removeFirstOccurrence](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* def {java11-javadoc}/java.base/java/util/Deque.html#removeLast()[removeLast]() -* boolean {java11-javadoc}/java.base/java/util/Deque.html#removeLastOccurrence(java.lang.Object)[removeLastOccurrence](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Dictionary]] -==== Dictionary -* Enumeration {java11-javadoc}/java.base/java/util/Dictionary.html#elements()[elements]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* def {java11-javadoc}/java.base/java/util/Dictionary.html#get(java.lang.Object)[get](def) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Dictionary.html#isEmpty()[isEmpty]() -* Enumeration {java11-javadoc}/java.base/java/util/Dictionary.html#keys()[keys]() -* def {java11-javadoc}/java.base/java/util/Dictionary.html#put(java.lang.Object,java.lang.Object)[put](def, def) -* def {java11-javadoc}/java.base/java/util/Dictionary.html#remove(java.lang.Object)[remove](def) -* int {java11-javadoc}/java.base/java/util/Dictionary.html#size()[size]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DoubleSummaryStatistics]] -==== DoubleSummaryStatistics -* {java11-javadoc}/java.base/java/util/DoubleSummaryStatistics.html#()[DoubleSummaryStatistics]() -* void {java11-javadoc}/java.base/java/util/function/DoubleConsumer.html#accept(double)[accept](double) -* DoubleConsumer {java11-javadoc}/java.base/java/util/function/DoubleConsumer.html#andThen(java.util.function.DoubleConsumer)[andThen](DoubleConsumer) -* void {java11-javadoc}/java.base/java/util/DoubleSummaryStatistics.html#combine(java.util.DoubleSummaryStatistics)[combine](DoubleSummaryStatistics) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* double {java11-javadoc}/java.base/java/util/DoubleSummaryStatistics.html#getAverage()[getAverage]() -* long {java11-javadoc}/java.base/java/util/DoubleSummaryStatistics.html#getCount()[getCount]() -* double {java11-javadoc}/java.base/java/util/DoubleSummaryStatistics.html#getMax()[getMax]() -* double {java11-javadoc}/java.base/java/util/DoubleSummaryStatistics.html#getMin()[getMin]() -* double {java11-javadoc}/java.base/java/util/DoubleSummaryStatistics.html#getSum()[getSum]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DuplicateFormatFlagsException]] -==== DuplicateFormatFlagsException -* {java11-javadoc}/java.base/java/util/DuplicateFormatFlagsException.html#(java.lang.String)[DuplicateFormatFlagsException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/util/DuplicateFormatFlagsException.html#getFlags()[getFlags]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-EmptyStackException]] -==== EmptyStackException -* {java11-javadoc}/java.base/java/util/EmptyStackException.html#()[EmptyStackException]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Enumeration]] -==== Enumeration -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/util/Enumeration.html#hasMoreElements()[hasMoreElements]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* def {java11-javadoc}/java.base/java/util/Enumeration.html#nextElement()[nextElement]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-EventListener]] -==== EventListener -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-EventListenerProxy]] -==== EventListenerProxy -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* EventListener {java11-javadoc}/java.base/java/util/EventListenerProxy.html#getListener()[getListener]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-EventObject]] -==== EventObject -* {java11-javadoc}/java.base/java/util/EventObject.html#(java.lang.Object)[EventObject](Object) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* Object {java11-javadoc}/java.base/java/util/EventObject.html#getSource()[getSource]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-FormatFlagsConversionMismatchException]] -==== FormatFlagsConversionMismatchException -* {java11-javadoc}/java.base/java/util/FormatFlagsConversionMismatchException.html#(java.lang.String,char)[FormatFlagsConversionMismatchException](null, char) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* char {java11-javadoc}/java.base/java/util/FormatFlagsConversionMismatchException.html#getConversion()[getConversion]() -* null {java11-javadoc}/java.base/java/util/FormatFlagsConversionMismatchException.html#getFlags()[getFlags]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Formattable]] -==== Formattable -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* void {java11-javadoc}/java.base/java/util/Formattable.html#formatTo(java.util.Formatter,int,int,int)[formatTo](Formatter, int, int, int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-FormattableFlags]] -==== FormattableFlags -* static int {java11-javadoc}/java.base/java/util/FormattableFlags.html#ALTERNATE[ALTERNATE] -* static int {java11-javadoc}/java.base/java/util/FormattableFlags.html#LEFT_JUSTIFY[LEFT_JUSTIFY] -* static int {java11-javadoc}/java.base/java/util/FormattableFlags.html#UPPERCASE[UPPERCASE] -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Formatter]] -==== Formatter -* {java11-javadoc}/java.base/java/util/Formatter.html#()[Formatter]() -* {java11-javadoc}/java.base/java/util/Formatter.html#(java.lang.Appendable)[Formatter](Appendable) -* {java11-javadoc}/java.base/java/util/Formatter.html#(java.lang.Appendable,java.util.Locale)[Formatter](Appendable, Locale) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* Formatter {java11-javadoc}/java.base/java/util/Formatter.html#format(java.lang.String,java.lang.Object%5B%5D)[format](null, def[]) -* Formatter {java11-javadoc}/java.base/java/util/Formatter.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, null, def[]) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Locale {java11-javadoc}/java.base/java/util/Formatter.html#locale()[locale]() -* Appendable {java11-javadoc}/java.base/java/util/Formatter.html#out()[out]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Formatter-BigDecimalLayoutForm]] -==== Formatter.BigDecimalLayoutForm -* static Formatter.BigDecimalLayoutForm {java11-javadoc}/java.base/java/util/Formatter$BigDecimalLayoutForm.html#DECIMAL_FLOAT[DECIMAL_FLOAT] -* static Formatter.BigDecimalLayoutForm {java11-javadoc}/java.base/java/util/Formatter$BigDecimalLayoutForm.html#SCIENTIFIC[SCIENTIFIC] -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-FormatterClosedException]] -==== FormatterClosedException -* {java11-javadoc}/java.base/java/util/FormatterClosedException.html#()[FormatterClosedException]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-GregorianCalendar]] -==== GregorianCalendar -* static int {java11-javadoc}/java.base/java/util/GregorianCalendar.html#AD[AD] -* static int {java11-javadoc}/java.base/java/util/GregorianCalendar.html#BC[BC] -* static GregorianCalendar {java11-javadoc}/java.base/java/util/GregorianCalendar.html#from(java.time.ZonedDateTime)[from](ZonedDateTime) -* {java11-javadoc}/java.base/java/util/GregorianCalendar.html#()[GregorianCalendar]() -* {java11-javadoc}/java.base/java/util/GregorianCalendar.html#(java.util.TimeZone)[GregorianCalendar](TimeZone) -* {java11-javadoc}/java.base/java/util/GregorianCalendar.html#(java.util.TimeZone,java.util.Locale)[GregorianCalendar](TimeZone, Locale) -* {java11-javadoc}/java.base/java/util/GregorianCalendar.html#(int,int,int)[GregorianCalendar](int, int, int) -* {java11-javadoc}/java.base/java/util/GregorianCalendar.html#(int,int,int,int,int)[GregorianCalendar](int, int, int, int, int) -* {java11-javadoc}/java.base/java/util/GregorianCalendar.html#(int,int,int,int,int,int)[GregorianCalendar](int, int, int, int, int, int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#add(int,int)[add](int, int) -* boolean {java11-javadoc}/java.base/java/util/Calendar.html#after(java.lang.Object)[after](Object) -* boolean {java11-javadoc}/java.base/java/util/Calendar.html#before(java.lang.Object)[before](Object) -* void {java11-javadoc}/java.base/java/util/Calendar.html#clear()[clear]() -* void {java11-javadoc}/java.base/java/util/Calendar.html#clear(int)[clear](int) -* def {java11-javadoc}/java.base/java/util/Calendar.html#clone()[clone]() -* int {java11-javadoc}/java.base/java/util/Calendar.html#compareTo(java.util.Calendar)[compareTo](Calendar) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/util/Calendar.html#get(int)[get](int) -* int {java11-javadoc}/java.base/java/util/Calendar.html#getActualMaximum(int)[getActualMaximum](int) -* int {java11-javadoc}/java.base/java/util/Calendar.html#getActualMinimum(int)[getActualMinimum](int) -* null {java11-javadoc}/java.base/java/util/Calendar.html#getCalendarType()[getCalendarType]() -* null {java11-javadoc}/java.base/java/util/Calendar.html#getDisplayName(int,int,java.util.Locale)[getDisplayName](int, int, Locale) -* Map {java11-javadoc}/java.base/java/util/Calendar.html#getDisplayNames(int,int,java.util.Locale)[getDisplayNames](int, int, Locale) -* int {java11-javadoc}/java.base/java/util/Calendar.html#getFirstDayOfWeek()[getFirstDayOfWeek]() -* int {java11-javadoc}/java.base/java/util/Calendar.html#getGreatestMinimum(int)[getGreatestMinimum](int) -* Date {java11-javadoc}/java.base/java/util/GregorianCalendar.html#getGregorianChange()[getGregorianChange]() -* int {java11-javadoc}/java.base/java/util/Calendar.html#getLeastMaximum(int)[getLeastMaximum](int) -* int {java11-javadoc}/java.base/java/util/Calendar.html#getMaximum(int)[getMaximum](int) -* int {java11-javadoc}/java.base/java/util/Calendar.html#getMinimalDaysInFirstWeek()[getMinimalDaysInFirstWeek]() -* int {java11-javadoc}/java.base/java/util/Calendar.html#getMinimum(int)[getMinimum](int) -* Date {java11-javadoc}/java.base/java/util/Calendar.html#getTime()[getTime]() -* long {java11-javadoc}/java.base/java/util/Calendar.html#getTimeInMillis()[getTimeInMillis]() -* TimeZone {java11-javadoc}/java.base/java/util/Calendar.html#getTimeZone()[getTimeZone]() -* int {java11-javadoc}/java.base/java/util/Calendar.html#getWeekYear()[getWeekYear]() -* int {java11-javadoc}/java.base/java/util/Calendar.html#getWeeksInWeekYear()[getWeeksInWeekYear]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/GregorianCalendar.html#isLeapYear(int)[isLeapYear](int) -* boolean {java11-javadoc}/java.base/java/util/Calendar.html#isLenient()[isLenient]() -* boolean {java11-javadoc}/java.base/java/util/Calendar.html#isSet(int)[isSet](int) -* boolean {java11-javadoc}/java.base/java/util/Calendar.html#isWeekDateSupported()[isWeekDateSupported]() -* void {java11-javadoc}/java.base/java/util/Calendar.html#roll(int,int)[roll](int, int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#set(int,int)[set](int, int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#set(int,int,int)[set](int, int, int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#set(int,int,int,int,int)[set](int, int, int, int, int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#set(int,int,int,int,int,int)[set](int, int, int, int, int, int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setFirstDayOfWeek(int)[setFirstDayOfWeek](int) -* void {java11-javadoc}/java.base/java/util/GregorianCalendar.html#setGregorianChange(java.util.Date)[setGregorianChange](Date) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setLenient(boolean)[setLenient](boolean) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setMinimalDaysInFirstWeek(int)[setMinimalDaysInFirstWeek](int) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setTime(java.util.Date)[setTime](Date) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setTimeInMillis(long)[setTimeInMillis](long) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setTimeZone(java.util.TimeZone)[setTimeZone](TimeZone) -* void {java11-javadoc}/java.base/java/util/Calendar.html#setWeekDate(int,int,int)[setWeekDate](int, int, int) -* Instant {java11-javadoc}/java.base/java/util/Calendar.html#toInstant()[toInstant]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* ZonedDateTime {java11-javadoc}/java.base/java/util/GregorianCalendar.html#toZonedDateTime()[toZonedDateTime]() - - -[[painless-api-reference-shared-HashMap]] -==== HashMap -* {java11-javadoc}/java.base/java/util/HashMap.html#()[HashMap]() -* {java11-javadoc}/java.base/java/util/HashMap.html#(java.util.Map)[HashMap](Map) -* void {java11-javadoc}/java.base/java/util/Map.html#clear()[clear]() -* def {java11-javadoc}/java.base/java/util/HashMap.html#clone()[clone]() -* List collect(BiFunction) -* def collect(Collection, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#compute(java.lang.Object,java.util.function.BiFunction)[compute](def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfAbsent(java.lang.Object,java.util.function.Function)[computeIfAbsent](def, Function) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfPresent(java.lang.Object,java.util.function.BiFunction)[computeIfPresent](def, BiFunction) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsKey(java.lang.Object)[containsKey](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsValue(java.lang.Object)[containsValue](def) -* int count(BiPredicate) -* def each(BiConsumer) -* Set {java11-javadoc}/java.base/java/util/Map.html#entrySet()[entrySet]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#equals(java.lang.Object)[equals](Object) -* boolean every(BiPredicate) -* Map.Entry find(BiPredicate) -* Map findAll(BiPredicate) -* def findResult(BiFunction) -* def findResult(def, BiFunction) -* List findResults(BiFunction) -* void {java11-javadoc}/java.base/java/util/Map.html#forEach(java.util.function.BiConsumer)[forEach](BiConsumer) -* def {java11-javadoc}/java.base/java/util/Map.html#get(java.lang.Object)[get](def) -* Object getByPath(null) -* Object getByPath(null, Object) -* def {java11-javadoc}/java.base/java/util/Map.html#getOrDefault(java.lang.Object,java.lang.Object)[getOrDefault](def, def) -* Map groupBy(BiFunction) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#isEmpty()[isEmpty]() -* Set {java11-javadoc}/java.base/java/util/Map.html#keySet()[keySet]() -* def {java11-javadoc}/java.base/java/util/Map.html#merge(java.lang.Object,java.lang.Object,java.util.function.BiFunction)[merge](def, def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#put(java.lang.Object,java.lang.Object)[put](def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#putAll(java.util.Map)[putAll](Map) -* def {java11-javadoc}/java.base/java/util/Map.html#putIfAbsent(java.lang.Object,java.lang.Object)[putIfAbsent](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object,java.lang.Object)[remove](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object)[replace](def, def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object,java.lang.Object)[replace](def, def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#replaceAll(java.util.function.BiFunction)[replaceAll](BiFunction) -* int {java11-javadoc}/java.base/java/util/Map.html#size()[size]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Collection {java11-javadoc}/java.base/java/util/Map.html#values()[values]() - - -[[painless-api-reference-shared-HashSet]] -==== HashSet -* {java11-javadoc}/java.base/java/util/HashSet.html#()[HashSet]() -* {java11-javadoc}/java.base/java/util/HashSet.html#(java.util.Collection)[HashSet](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* def {java11-javadoc}/java.base/java/util/HashSet.html#clone()[clone]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/Set.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/Set.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* boolean {java11-javadoc}/java.base/java/util/Set.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Hashtable]] -==== Hashtable -* {java11-javadoc}/java.base/java/util/Hashtable.html#()[Hashtable]() -* {java11-javadoc}/java.base/java/util/Hashtable.html#(java.util.Map)[Hashtable](Map) -* void {java11-javadoc}/java.base/java/util/Map.html#clear()[clear]() -* def {java11-javadoc}/java.base/java/util/Hashtable.html#clone()[clone]() -* List collect(BiFunction) -* def collect(Collection, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#compute(java.lang.Object,java.util.function.BiFunction)[compute](def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfAbsent(java.lang.Object,java.util.function.Function)[computeIfAbsent](def, Function) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfPresent(java.lang.Object,java.util.function.BiFunction)[computeIfPresent](def, BiFunction) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsKey(java.lang.Object)[containsKey](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsValue(java.lang.Object)[containsValue](def) -* int count(BiPredicate) -* def each(BiConsumer) -* Enumeration {java11-javadoc}/java.base/java/util/Dictionary.html#elements()[elements]() -* Set {java11-javadoc}/java.base/java/util/Map.html#entrySet()[entrySet]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#equals(java.lang.Object)[equals](Object) -* boolean every(BiPredicate) -* Map.Entry find(BiPredicate) -* Map findAll(BiPredicate) -* def findResult(BiFunction) -* def findResult(def, BiFunction) -* List findResults(BiFunction) -* void {java11-javadoc}/java.base/java/util/Map.html#forEach(java.util.function.BiConsumer)[forEach](BiConsumer) -* def {java11-javadoc}/java.base/java/util/Map.html#get(java.lang.Object)[get](def) -* Object getByPath(null) -* Object getByPath(null, Object) -* def {java11-javadoc}/java.base/java/util/Map.html#getOrDefault(java.lang.Object,java.lang.Object)[getOrDefault](def, def) -* Map groupBy(BiFunction) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#isEmpty()[isEmpty]() -* Set {java11-javadoc}/java.base/java/util/Map.html#keySet()[keySet]() -* Enumeration {java11-javadoc}/java.base/java/util/Dictionary.html#keys()[keys]() -* def {java11-javadoc}/java.base/java/util/Map.html#merge(java.lang.Object,java.lang.Object,java.util.function.BiFunction)[merge](def, def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#put(java.lang.Object,java.lang.Object)[put](def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#putAll(java.util.Map)[putAll](Map) -* def {java11-javadoc}/java.base/java/util/Map.html#putIfAbsent(java.lang.Object,java.lang.Object)[putIfAbsent](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object,java.lang.Object)[remove](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object)[replace](def, def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object,java.lang.Object)[replace](def, def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#replaceAll(java.util.function.BiFunction)[replaceAll](BiFunction) -* int {java11-javadoc}/java.base/java/util/Map.html#size()[size]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Collection {java11-javadoc}/java.base/java/util/Map.html#values()[values]() - - -[[painless-api-reference-shared-IdentityHashMap]] -==== IdentityHashMap -* {java11-javadoc}/java.base/java/util/IdentityHashMap.html#()[IdentityHashMap]() -* {java11-javadoc}/java.base/java/util/IdentityHashMap.html#(java.util.Map)[IdentityHashMap](Map) -* void {java11-javadoc}/java.base/java/util/Map.html#clear()[clear]() -* def {java11-javadoc}/java.base/java/util/IdentityHashMap.html#clone()[clone]() -* List collect(BiFunction) -* def collect(Collection, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#compute(java.lang.Object,java.util.function.BiFunction)[compute](def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfAbsent(java.lang.Object,java.util.function.Function)[computeIfAbsent](def, Function) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfPresent(java.lang.Object,java.util.function.BiFunction)[computeIfPresent](def, BiFunction) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsKey(java.lang.Object)[containsKey](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsValue(java.lang.Object)[containsValue](def) -* int count(BiPredicate) -* def each(BiConsumer) -* Set {java11-javadoc}/java.base/java/util/Map.html#entrySet()[entrySet]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#equals(java.lang.Object)[equals](Object) -* boolean every(BiPredicate) -* Map.Entry find(BiPredicate) -* Map findAll(BiPredicate) -* def findResult(BiFunction) -* def findResult(def, BiFunction) -* List findResults(BiFunction) -* void {java11-javadoc}/java.base/java/util/Map.html#forEach(java.util.function.BiConsumer)[forEach](BiConsumer) -* def {java11-javadoc}/java.base/java/util/Map.html#get(java.lang.Object)[get](def) -* Object getByPath(null) -* Object getByPath(null, Object) -* def {java11-javadoc}/java.base/java/util/Map.html#getOrDefault(java.lang.Object,java.lang.Object)[getOrDefault](def, def) -* Map groupBy(BiFunction) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#isEmpty()[isEmpty]() -* Set {java11-javadoc}/java.base/java/util/Map.html#keySet()[keySet]() -* def {java11-javadoc}/java.base/java/util/Map.html#merge(java.lang.Object,java.lang.Object,java.util.function.BiFunction)[merge](def, def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#put(java.lang.Object,java.lang.Object)[put](def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#putAll(java.util.Map)[putAll](Map) -* def {java11-javadoc}/java.base/java/util/Map.html#putIfAbsent(java.lang.Object,java.lang.Object)[putIfAbsent](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object,java.lang.Object)[remove](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object)[replace](def, def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object,java.lang.Object)[replace](def, def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#replaceAll(java.util.function.BiFunction)[replaceAll](BiFunction) -* int {java11-javadoc}/java.base/java/util/Map.html#size()[size]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Collection {java11-javadoc}/java.base/java/util/Map.html#values()[values]() - - -[[painless-api-reference-shared-IllegalFormatCodePointException]] -==== IllegalFormatCodePointException -* {java11-javadoc}/java.base/java/util/IllegalFormatCodePointException.html#(int)[IllegalFormatCodePointException](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/util/IllegalFormatCodePointException.html#getCodePoint()[getCodePoint]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IllegalFormatConversionException]] -==== IllegalFormatConversionException -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* char {java11-javadoc}/java.base/java/util/IllegalFormatConversionException.html#getConversion()[getConversion]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IllegalFormatException]] -==== IllegalFormatException -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IllegalFormatFlagsException]] -==== IllegalFormatFlagsException -* {java11-javadoc}/java.base/java/util/IllegalFormatFlagsException.html#(java.lang.String)[IllegalFormatFlagsException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/util/IllegalFormatFlagsException.html#getFlags()[getFlags]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IllegalFormatPrecisionException]] -==== IllegalFormatPrecisionException -* {java11-javadoc}/java.base/java/util/IllegalFormatPrecisionException.html#(int)[IllegalFormatPrecisionException](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* int {java11-javadoc}/java.base/java/util/IllegalFormatPrecisionException.html#getPrecision()[getPrecision]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IllegalFormatWidthException]] -==== IllegalFormatWidthException -* {java11-javadoc}/java.base/java/util/IllegalFormatWidthException.html#(int)[IllegalFormatWidthException](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/util/IllegalFormatWidthException.html#getWidth()[getWidth]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IllformedLocaleException]] -==== IllformedLocaleException -* {java11-javadoc}/java.base/java/util/IllformedLocaleException.html#()[IllformedLocaleException]() -* {java11-javadoc}/java.base/java/util/IllformedLocaleException.html#(java.lang.String)[IllformedLocaleException](null) -* {java11-javadoc}/java.base/java/util/IllformedLocaleException.html#(java.lang.String,int)[IllformedLocaleException](null, int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/util/IllformedLocaleException.html#getErrorIndex()[getErrorIndex]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-InputMismatchException]] -==== InputMismatchException -* {java11-javadoc}/java.base/java/util/InputMismatchException.html#()[InputMismatchException]() -* {java11-javadoc}/java.base/java/util/InputMismatchException.html#(java.lang.String)[InputMismatchException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IntSummaryStatistics]] -==== IntSummaryStatistics -* {java11-javadoc}/java.base/java/util/IntSummaryStatistics.html#()[IntSummaryStatistics]() -* void {java11-javadoc}/java.base/java/util/function/IntConsumer.html#accept(int)[accept](int) -* IntConsumer {java11-javadoc}/java.base/java/util/function/IntConsumer.html#andThen(java.util.function.IntConsumer)[andThen](IntConsumer) -* void {java11-javadoc}/java.base/java/util/IntSummaryStatistics.html#combine(java.util.IntSummaryStatistics)[combine](IntSummaryStatistics) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* double {java11-javadoc}/java.base/java/util/IntSummaryStatistics.html#getAverage()[getAverage]() -* long {java11-javadoc}/java.base/java/util/IntSummaryStatistics.html#getCount()[getCount]() -* int {java11-javadoc}/java.base/java/util/IntSummaryStatistics.html#getMax()[getMax]() -* int {java11-javadoc}/java.base/java/util/IntSummaryStatistics.html#getMin()[getMin]() -* long {java11-javadoc}/java.base/java/util/IntSummaryStatistics.html#getSum()[getSum]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Iterator]] -==== Iterator -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* void {java11-javadoc}/java.base/java/util/Iterator.html#forEachRemaining(java.util.function.Consumer)[forEachRemaining](Consumer) -* boolean {java11-javadoc}/java.base/java/util/Iterator.html#hasNext()[hasNext]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* def {java11-javadoc}/java.base/java/util/Iterator.html#next()[next]() -* void {java11-javadoc}/java.base/java/util/Iterator.html#remove()[remove]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-LinkedHashMap]] -==== LinkedHashMap -* {java11-javadoc}/java.base/java/util/LinkedHashMap.html#()[LinkedHashMap]() -* {java11-javadoc}/java.base/java/util/LinkedHashMap.html#(java.util.Map)[LinkedHashMap](Map) -* void {java11-javadoc}/java.base/java/util/Map.html#clear()[clear]() -* def {java11-javadoc}/java.base/java/util/HashMap.html#clone()[clone]() -* List collect(BiFunction) -* def collect(Collection, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#compute(java.lang.Object,java.util.function.BiFunction)[compute](def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfAbsent(java.lang.Object,java.util.function.Function)[computeIfAbsent](def, Function) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfPresent(java.lang.Object,java.util.function.BiFunction)[computeIfPresent](def, BiFunction) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsKey(java.lang.Object)[containsKey](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsValue(java.lang.Object)[containsValue](def) -* int count(BiPredicate) -* def each(BiConsumer) -* Set {java11-javadoc}/java.base/java/util/Map.html#entrySet()[entrySet]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#equals(java.lang.Object)[equals](Object) -* boolean every(BiPredicate) -* Map.Entry find(BiPredicate) -* Map findAll(BiPredicate) -* def findResult(BiFunction) -* def findResult(def, BiFunction) -* List findResults(BiFunction) -* void {java11-javadoc}/java.base/java/util/Map.html#forEach(java.util.function.BiConsumer)[forEach](BiConsumer) -* def {java11-javadoc}/java.base/java/util/Map.html#get(java.lang.Object)[get](def) -* Object getByPath(null) -* Object getByPath(null, Object) -* def {java11-javadoc}/java.base/java/util/Map.html#getOrDefault(java.lang.Object,java.lang.Object)[getOrDefault](def, def) -* Map groupBy(BiFunction) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#isEmpty()[isEmpty]() -* Set {java11-javadoc}/java.base/java/util/Map.html#keySet()[keySet]() -* def {java11-javadoc}/java.base/java/util/Map.html#merge(java.lang.Object,java.lang.Object,java.util.function.BiFunction)[merge](def, def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#put(java.lang.Object,java.lang.Object)[put](def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#putAll(java.util.Map)[putAll](Map) -* def {java11-javadoc}/java.base/java/util/Map.html#putIfAbsent(java.lang.Object,java.lang.Object)[putIfAbsent](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object,java.lang.Object)[remove](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object)[replace](def, def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object,java.lang.Object)[replace](def, def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#replaceAll(java.util.function.BiFunction)[replaceAll](BiFunction) -* int {java11-javadoc}/java.base/java/util/Map.html#size()[size]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Collection {java11-javadoc}/java.base/java/util/Map.html#values()[values]() - - -[[painless-api-reference-shared-LinkedHashSet]] -==== LinkedHashSet -* {java11-javadoc}/java.base/java/util/LinkedHashSet.html#()[LinkedHashSet]() -* {java11-javadoc}/java.base/java/util/LinkedHashSet.html#(java.util.Collection)[LinkedHashSet](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* def {java11-javadoc}/java.base/java/util/HashSet.html#clone()[clone]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/Set.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/Set.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* boolean {java11-javadoc}/java.base/java/util/Set.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-LinkedList]] -==== LinkedList -* {java11-javadoc}/java.base/java/util/LinkedList.html#()[LinkedList]() -* {java11-javadoc}/java.base/java/util/LinkedList.html#(java.util.Collection)[LinkedList](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* void {java11-javadoc}/java.base/java/util/Deque.html#addFirst(java.lang.Object)[addFirst](def) -* void {java11-javadoc}/java.base/java/util/Deque.html#addLast(java.lang.Object)[addLast](def) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* def {java11-javadoc}/java.base/java/util/LinkedList.html#clone()[clone]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* Iterator {java11-javadoc}/java.base/java/util/Deque.html#descendingIterator()[descendingIterator]() -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* def {java11-javadoc}/java.base/java/util/Queue.html#element()[element]() -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* def {java11-javadoc}/java.base/java/util/List.html#get(int)[get](int) -* Object getByPath(null) -* Object getByPath(null, Object) -* def {java11-javadoc}/java.base/java/util/Deque.html#getFirst()[getFirst]() -* def {java11-javadoc}/java.base/java/util/Deque.html#getLast()[getLast]() -* int getLength() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* boolean {java11-javadoc}/java.base/java/util/Queue.html#offer(java.lang.Object)[offer](def) -* boolean {java11-javadoc}/java.base/java/util/Deque.html#offerFirst(java.lang.Object)[offerFirst](def) -* boolean {java11-javadoc}/java.base/java/util/Deque.html#offerLast(java.lang.Object)[offerLast](def) -* def {java11-javadoc}/java.base/java/util/Queue.html#peek()[peek]() -* def {java11-javadoc}/java.base/java/util/Deque.html#peekFirst()[peekFirst]() -* def {java11-javadoc}/java.base/java/util/Deque.html#peekLast()[peekLast]() -* def {java11-javadoc}/java.base/java/util/Queue.html#poll()[poll]() -* def {java11-javadoc}/java.base/java/util/Deque.html#pollFirst()[pollFirst]() -* def {java11-javadoc}/java.base/java/util/Deque.html#pollLast()[pollLast]() -* def {java11-javadoc}/java.base/java/util/Deque.html#pop()[pop]() -* void {java11-javadoc}/java.base/java/util/Deque.html#push(java.lang.Object)[push](def) -* def {java11-javadoc}/java.base/java/util/Queue.html#remove()[remove]() -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* def {java11-javadoc}/java.base/java/util/Deque.html#removeFirst()[removeFirst]() -* boolean {java11-javadoc}/java.base/java/util/Deque.html#removeFirstOccurrence(java.lang.Object)[removeFirstOccurrence](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* def {java11-javadoc}/java.base/java/util/Deque.html#removeLast()[removeLast]() -* boolean {java11-javadoc}/java.base/java/util/Deque.html#removeLastOccurrence(java.lang.Object)[removeLastOccurrence](def) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-List]] -==== List -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* def {java11-javadoc}/java.base/java/util/List.html#get(int)[get](int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ListIterator]] -==== ListIterator -* void {java11-javadoc}/java.base/java/util/ListIterator.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* void {java11-javadoc}/java.base/java/util/Iterator.html#forEachRemaining(java.util.function.Consumer)[forEachRemaining](Consumer) -* boolean {java11-javadoc}/java.base/java/util/Iterator.html#hasNext()[hasNext]() -* boolean {java11-javadoc}/java.base/java/util/ListIterator.html#hasPrevious()[hasPrevious]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* def {java11-javadoc}/java.base/java/util/Iterator.html#next()[next]() -* int {java11-javadoc}/java.base/java/util/ListIterator.html#nextIndex()[nextIndex]() -* int {java11-javadoc}/java.base/java/util/ListIterator.html#previousIndex()[previousIndex]() -* void {java11-javadoc}/java.base/java/util/Iterator.html#remove()[remove]() -* void {java11-javadoc}/java.base/java/util/ListIterator.html#set(java.lang.Object)[set](def) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Locale]] -==== Locale -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#CANADA[CANADA] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#CANADA_FRENCH[CANADA_FRENCH] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#CHINA[CHINA] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#CHINESE[CHINESE] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#ENGLISH[ENGLISH] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#FRANCE[FRANCE] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#FRENCH[FRENCH] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#GERMAN[GERMAN] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#GERMANY[GERMANY] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#ITALIAN[ITALIAN] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#ITALY[ITALY] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#JAPAN[JAPAN] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#JAPANESE[JAPANESE] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#KOREA[KOREA] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#KOREAN[KOREAN] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#PRC[PRC] -* static char {java11-javadoc}/java.base/java/util/Locale.html#PRIVATE_USE_EXTENSION[PRIVATE_USE_EXTENSION] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#ROOT[ROOT] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#SIMPLIFIED_CHINESE[SIMPLIFIED_CHINESE] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#TAIWAN[TAIWAN] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#TRADITIONAL_CHINESE[TRADITIONAL_CHINESE] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#UK[UK] -* static char {java11-javadoc}/java.base/java/util/Locale.html#UNICODE_LOCALE_EXTENSION[UNICODE_LOCALE_EXTENSION] -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#US[US] -* static List {java11-javadoc}/java.base/java/util/Locale.html#filter(java.util.List,java.util.Collection)[filter](List, Collection) -* static List {java11-javadoc}/java.base/java/util/Locale.html#filterTags(java.util.List,java.util.Collection)[filterTags](List, Collection) -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#forLanguageTag(java.lang.String)[forLanguageTag](null) -* static Locale[] {java11-javadoc}/java.base/java/util/Locale.html#getAvailableLocales()[getAvailableLocales]() -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#getDefault()[getDefault]() -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#getDefault(java.util.Locale$Category)[getDefault](Locale.Category) -* static null[] {java11-javadoc}/java.base/java/util/Locale.html#getISOCountries()[getISOCountries]() -* static null[] {java11-javadoc}/java.base/java/util/Locale.html#getISOLanguages()[getISOLanguages]() -* static Locale {java11-javadoc}/java.base/java/util/Locale.html#lookup(java.util.List,java.util.Collection)[lookup](List, Collection) -* static null {java11-javadoc}/java.base/java/util/Locale.html#lookupTag(java.util.List,java.util.Collection)[lookupTag](List, Collection) -* {java11-javadoc}/java.base/java/util/Locale.html#(java.lang.String)[Locale](null) -* {java11-javadoc}/java.base/java/util/Locale.html#(java.lang.String,java.lang.String)[Locale](null, null) -* {java11-javadoc}/java.base/java/util/Locale.html#(java.lang.String,java.lang.String,java.lang.String)[Locale](null, null, null) -* def {java11-javadoc}/java.base/java/util/Locale.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/util/Locale.html#getCountry()[getCountry]() -* null {java11-javadoc}/java.base/java/util/Locale.html#getDisplayCountry()[getDisplayCountry]() -* null {java11-javadoc}/java.base/java/util/Locale.html#getDisplayCountry(java.util.Locale)[getDisplayCountry](Locale) -* null {java11-javadoc}/java.base/java/util/Locale.html#getDisplayLanguage()[getDisplayLanguage]() -* null {java11-javadoc}/java.base/java/util/Locale.html#getDisplayLanguage(java.util.Locale)[getDisplayLanguage](Locale) -* null {java11-javadoc}/java.base/java/util/Locale.html#getDisplayName()[getDisplayName]() -* null {java11-javadoc}/java.base/java/util/Locale.html#getDisplayName(java.util.Locale)[getDisplayName](Locale) -* null {java11-javadoc}/java.base/java/util/Locale.html#getDisplayScript()[getDisplayScript]() -* null {java11-javadoc}/java.base/java/util/Locale.html#getDisplayScript(java.util.Locale)[getDisplayScript](Locale) -* null {java11-javadoc}/java.base/java/util/Locale.html#getDisplayVariant()[getDisplayVariant]() -* null {java11-javadoc}/java.base/java/util/Locale.html#getDisplayVariant(java.util.Locale)[getDisplayVariant](Locale) -* null {java11-javadoc}/java.base/java/util/Locale.html#getExtension(char)[getExtension](char) -* Set {java11-javadoc}/java.base/java/util/Locale.html#getExtensionKeys()[getExtensionKeys]() -* null {java11-javadoc}/java.base/java/util/Locale.html#getISO3Country()[getISO3Country]() -* null {java11-javadoc}/java.base/java/util/Locale.html#getISO3Language()[getISO3Language]() -* null {java11-javadoc}/java.base/java/util/Locale.html#getLanguage()[getLanguage]() -* null {java11-javadoc}/java.base/java/util/Locale.html#getScript()[getScript]() -* Set {java11-javadoc}/java.base/java/util/Locale.html#getUnicodeLocaleAttributes()[getUnicodeLocaleAttributes]() -* Set {java11-javadoc}/java.base/java/util/Locale.html#getUnicodeLocaleKeys()[getUnicodeLocaleKeys]() -* null {java11-javadoc}/java.base/java/util/Locale.html#getUnicodeLocaleType(java.lang.String)[getUnicodeLocaleType](null) -* null {java11-javadoc}/java.base/java/util/Locale.html#getVariant()[getVariant]() -* boolean {java11-javadoc}/java.base/java/util/Locale.html#hasExtensions()[hasExtensions]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Locale {java11-javadoc}/java.base/java/util/Locale.html#stripExtensions()[stripExtensions]() -* null {java11-javadoc}/java.base/java/util/Locale.html#toLanguageTag()[toLanguageTag]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Locale-Builder]] -==== Locale.Builder -* {java11-javadoc}/java.base/java/util/Locale$Builder.html#()[Locale.Builder]() -* Locale.Builder {java11-javadoc}/java.base/java/util/Locale$Builder.html#addUnicodeLocaleAttribute(java.lang.String)[addUnicodeLocaleAttribute](null) -* Locale {java11-javadoc}/java.base/java/util/Locale$Builder.html#build()[build]() -* Locale.Builder {java11-javadoc}/java.base/java/util/Locale$Builder.html#clear()[clear]() -* Locale.Builder {java11-javadoc}/java.base/java/util/Locale$Builder.html#clearExtensions()[clearExtensions]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Locale.Builder {java11-javadoc}/java.base/java/util/Locale$Builder.html#removeUnicodeLocaleAttribute(java.lang.String)[removeUnicodeLocaleAttribute](null) -* Locale.Builder {java11-javadoc}/java.base/java/util/Locale$Builder.html#setExtension(char,java.lang.String)[setExtension](char, null) -* Locale.Builder {java11-javadoc}/java.base/java/util/Locale$Builder.html#setLanguage(java.lang.String)[setLanguage](null) -* Locale.Builder {java11-javadoc}/java.base/java/util/Locale$Builder.html#setLanguageTag(java.lang.String)[setLanguageTag](null) -* Locale.Builder {java11-javadoc}/java.base/java/util/Locale$Builder.html#setLocale(java.util.Locale)[setLocale](Locale) -* Locale.Builder {java11-javadoc}/java.base/java/util/Locale$Builder.html#setRegion(java.lang.String)[setRegion](null) -* Locale.Builder {java11-javadoc}/java.base/java/util/Locale$Builder.html#setScript(java.lang.String)[setScript](null) -* Locale.Builder {java11-javadoc}/java.base/java/util/Locale$Builder.html#setUnicodeLocaleKeyword(java.lang.String,java.lang.String)[setUnicodeLocaleKeyword](null, null) -* Locale.Builder {java11-javadoc}/java.base/java/util/Locale$Builder.html#setVariant(java.lang.String)[setVariant](null) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Locale-Category]] -==== Locale.Category -* static Locale.Category {java11-javadoc}/java.base/java/util/Locale$Category.html#DISPLAY[DISPLAY] -* static Locale.Category {java11-javadoc}/java.base/java/util/Locale$Category.html#FORMAT[FORMAT] -* static Locale.Category {java11-javadoc}/java.base/java/util/Locale$Category.html#valueOf(java.lang.String)[valueOf](null) -* static Locale.Category[] {java11-javadoc}/java.base/java/util/Locale$Category.html#values()[values]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Locale-FilteringMode]] -==== Locale.FilteringMode -* static Locale.FilteringMode {java11-javadoc}/java.base/java/util/Locale$FilteringMode.html#AUTOSELECT_FILTERING[AUTOSELECT_FILTERING] -* static Locale.FilteringMode {java11-javadoc}/java.base/java/util/Locale$FilteringMode.html#EXTENDED_FILTERING[EXTENDED_FILTERING] -* static Locale.FilteringMode {java11-javadoc}/java.base/java/util/Locale$FilteringMode.html#IGNORE_EXTENDED_RANGES[IGNORE_EXTENDED_RANGES] -* static Locale.FilteringMode {java11-javadoc}/java.base/java/util/Locale$FilteringMode.html#MAP_EXTENDED_RANGES[MAP_EXTENDED_RANGES] -* static Locale.FilteringMode {java11-javadoc}/java.base/java/util/Locale$FilteringMode.html#REJECT_EXTENDED_RANGES[REJECT_EXTENDED_RANGES] -* static Locale.FilteringMode {java11-javadoc}/java.base/java/util/Locale$FilteringMode.html#valueOf(java.lang.String)[valueOf](null) -* static Locale.FilteringMode[] {java11-javadoc}/java.base/java/util/Locale$FilteringMode.html#values()[values]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Locale-LanguageRange]] -==== Locale.LanguageRange -* static double {java11-javadoc}/java.base/java/util/Locale$LanguageRange.html#MAX_WEIGHT[MAX_WEIGHT] -* static double {java11-javadoc}/java.base/java/util/Locale$LanguageRange.html#MIN_WEIGHT[MIN_WEIGHT] -* static List {java11-javadoc}/java.base/java/util/Locale$LanguageRange.html#mapEquivalents(java.util.List,java.util.Map)[mapEquivalents](List, Map) -* static List {java11-javadoc}/java.base/java/util/Locale$LanguageRange.html#parse(java.lang.String)[parse](null) -* static List {java11-javadoc}/java.base/java/util/Locale$LanguageRange.html#parse(java.lang.String,java.util.Map)[parse](null, Map) -* {java11-javadoc}/java.base/java/util/Locale$LanguageRange.html#(java.lang.String)[Locale.LanguageRange](null) -* {java11-javadoc}/java.base/java/util/Locale$LanguageRange.html#(java.lang.String,double)[Locale.LanguageRange](null, double) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/util/Locale$LanguageRange.html#getRange()[getRange]() -* double {java11-javadoc}/java.base/java/util/Locale$LanguageRange.html#getWeight()[getWeight]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-LongSummaryStatistics]] -==== LongSummaryStatistics -* {java11-javadoc}/java.base/java/util/LongSummaryStatistics.html#()[LongSummaryStatistics]() -* void {java11-javadoc}/java.base/java/util/function/LongConsumer.html#accept(long)[accept](long) -* LongConsumer {java11-javadoc}/java.base/java/util/function/LongConsumer.html#andThen(java.util.function.LongConsumer)[andThen](LongConsumer) -* void {java11-javadoc}/java.base/java/util/LongSummaryStatistics.html#combine(java.util.LongSummaryStatistics)[combine](LongSummaryStatistics) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* double {java11-javadoc}/java.base/java/util/LongSummaryStatistics.html#getAverage()[getAverage]() -* long {java11-javadoc}/java.base/java/util/LongSummaryStatistics.html#getCount()[getCount]() -* long {java11-javadoc}/java.base/java/util/LongSummaryStatistics.html#getMax()[getMax]() -* long {java11-javadoc}/java.base/java/util/LongSummaryStatistics.html#getMin()[getMin]() -* long {java11-javadoc}/java.base/java/util/LongSummaryStatistics.html#getSum()[getSum]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Map]] -==== Map -* void {java11-javadoc}/java.base/java/util/Map.html#clear()[clear]() -* List collect(BiFunction) -* def collect(Collection, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#compute(java.lang.Object,java.util.function.BiFunction)[compute](def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfAbsent(java.lang.Object,java.util.function.Function)[computeIfAbsent](def, Function) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfPresent(java.lang.Object,java.util.function.BiFunction)[computeIfPresent](def, BiFunction) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsKey(java.lang.Object)[containsKey](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsValue(java.lang.Object)[containsValue](def) -* int count(BiPredicate) -* def each(BiConsumer) -* Set {java11-javadoc}/java.base/java/util/Map.html#entrySet()[entrySet]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#equals(java.lang.Object)[equals](Object) -* boolean every(BiPredicate) -* Map.Entry find(BiPredicate) -* Map findAll(BiPredicate) -* def findResult(BiFunction) -* def findResult(def, BiFunction) -* List findResults(BiFunction) -* void {java11-javadoc}/java.base/java/util/Map.html#forEach(java.util.function.BiConsumer)[forEach](BiConsumer) -* def {java11-javadoc}/java.base/java/util/Map.html#get(java.lang.Object)[get](def) -* Object getByPath(null) -* Object getByPath(null, Object) -* def {java11-javadoc}/java.base/java/util/Map.html#getOrDefault(java.lang.Object,java.lang.Object)[getOrDefault](def, def) -* Map groupBy(BiFunction) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#isEmpty()[isEmpty]() -* Set {java11-javadoc}/java.base/java/util/Map.html#keySet()[keySet]() -* def {java11-javadoc}/java.base/java/util/Map.html#merge(java.lang.Object,java.lang.Object,java.util.function.BiFunction)[merge](def, def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#put(java.lang.Object,java.lang.Object)[put](def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#putAll(java.util.Map)[putAll](Map) -* def {java11-javadoc}/java.base/java/util/Map.html#putIfAbsent(java.lang.Object,java.lang.Object)[putIfAbsent](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object,java.lang.Object)[remove](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object)[replace](def, def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object,java.lang.Object)[replace](def, def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#replaceAll(java.util.function.BiFunction)[replaceAll](BiFunction) -* int {java11-javadoc}/java.base/java/util/Map.html#size()[size]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Collection {java11-javadoc}/java.base/java/util/Map.html#values()[values]() - - -[[painless-api-reference-shared-Map-Entry]] -==== Map.Entry -* static Comparator {java11-javadoc}/java.base/java/util/Map$Entry.html#comparingByKey()[comparingByKey]() -* static Comparator {java11-javadoc}/java.base/java/util/Map$Entry.html#comparingByKey(java.util.Comparator)[comparingByKey](Comparator) -* static Comparator {java11-javadoc}/java.base/java/util/Map$Entry.html#comparingByValue()[comparingByValue]() -* static Comparator {java11-javadoc}/java.base/java/util/Map$Entry.html#comparingByValue(java.util.Comparator)[comparingByValue](Comparator) -* boolean {java11-javadoc}/java.base/java/util/Map$Entry.html#equals(java.lang.Object)[equals](Object) -* def {java11-javadoc}/java.base/java/util/Map$Entry.html#getKey()[getKey]() -* def {java11-javadoc}/java.base/java/util/Map$Entry.html#getValue()[getValue]() -* int {java11-javadoc}/java.base/java/util/Map$Entry.html#hashCode()[hashCode]() -* def {java11-javadoc}/java.base/java/util/Map$Entry.html#setValue(java.lang.Object)[setValue](def) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-MissingFormatArgumentException]] -==== MissingFormatArgumentException -* {java11-javadoc}/java.base/java/util/MissingFormatArgumentException.html#(java.lang.String)[MissingFormatArgumentException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/util/MissingFormatArgumentException.html#getFormatSpecifier()[getFormatSpecifier]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-MissingFormatWidthException]] -==== MissingFormatWidthException -* {java11-javadoc}/java.base/java/util/MissingFormatWidthException.html#(java.lang.String)[MissingFormatWidthException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/util/MissingFormatWidthException.html#getFormatSpecifier()[getFormatSpecifier]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-MissingResourceException]] -==== MissingResourceException -* {java11-javadoc}/java.base/java/util/MissingResourceException.html#(java.lang.String,java.lang.String,java.lang.String)[MissingResourceException](null, null, null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/util/MissingResourceException.html#getClassName()[getClassName]() -* null {java11-javadoc}/java.base/java/util/MissingResourceException.html#getKey()[getKey]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-NavigableMap]] -==== NavigableMap -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#ceilingEntry(java.lang.Object)[ceilingEntry](def) -* def {java11-javadoc}/java.base/java/util/NavigableMap.html#ceilingKey(java.lang.Object)[ceilingKey](def) -* void {java11-javadoc}/java.base/java/util/Map.html#clear()[clear]() -* List collect(BiFunction) -* def collect(Collection, BiFunction) -* Comparator {java11-javadoc}/java.base/java/util/SortedMap.html#comparator()[comparator]() -* def {java11-javadoc}/java.base/java/util/Map.html#compute(java.lang.Object,java.util.function.BiFunction)[compute](def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfAbsent(java.lang.Object,java.util.function.Function)[computeIfAbsent](def, Function) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfPresent(java.lang.Object,java.util.function.BiFunction)[computeIfPresent](def, BiFunction) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsKey(java.lang.Object)[containsKey](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsValue(java.lang.Object)[containsValue](def) -* int count(BiPredicate) -* NavigableSet {java11-javadoc}/java.base/java/util/NavigableMap.html#descendingKeySet()[descendingKeySet]() -* NavigableMap {java11-javadoc}/java.base/java/util/NavigableMap.html#descendingMap()[descendingMap]() -* def each(BiConsumer) -* Set {java11-javadoc}/java.base/java/util/Map.html#entrySet()[entrySet]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#equals(java.lang.Object)[equals](Object) -* boolean every(BiPredicate) -* Map.Entry find(BiPredicate) -* Map findAll(BiPredicate) -* def findResult(BiFunction) -* def findResult(def, BiFunction) -* List findResults(BiFunction) -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#firstEntry()[firstEntry]() -* def {java11-javadoc}/java.base/java/util/SortedMap.html#firstKey()[firstKey]() -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#floorEntry(java.lang.Object)[floorEntry](def) -* def {java11-javadoc}/java.base/java/util/NavigableMap.html#floorKey(java.lang.Object)[floorKey](def) -* void {java11-javadoc}/java.base/java/util/Map.html#forEach(java.util.function.BiConsumer)[forEach](BiConsumer) -* def {java11-javadoc}/java.base/java/util/Map.html#get(java.lang.Object)[get](def) -* Object getByPath(null) -* Object getByPath(null, Object) -* def {java11-javadoc}/java.base/java/util/Map.html#getOrDefault(java.lang.Object,java.lang.Object)[getOrDefault](def, def) -* Map groupBy(BiFunction) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* SortedMap {java11-javadoc}/java.base/java/util/SortedMap.html#headMap(java.lang.Object)[headMap](def) -* NavigableMap {java11-javadoc}/java.base/java/util/NavigableMap.html#headMap(java.lang.Object,boolean)[headMap](def, boolean) -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#higherEntry(java.lang.Object)[higherEntry](def) -* def {java11-javadoc}/java.base/java/util/NavigableMap.html#higherKey(java.lang.Object)[higherKey](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#isEmpty()[isEmpty]() -* Set {java11-javadoc}/java.base/java/util/Map.html#keySet()[keySet]() -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#lastEntry()[lastEntry]() -* def {java11-javadoc}/java.base/java/util/SortedMap.html#lastKey()[lastKey]() -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#lowerEntry(java.lang.Object)[lowerEntry](def) -* def {java11-javadoc}/java.base/java/util/Map.html#merge(java.lang.Object,java.lang.Object,java.util.function.BiFunction)[merge](def, def, BiFunction) -* NavigableSet {java11-javadoc}/java.base/java/util/NavigableMap.html#navigableKeySet()[navigableKeySet]() -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#pollFirstEntry()[pollFirstEntry]() -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#pollLastEntry()[pollLastEntry]() -* def {java11-javadoc}/java.base/java/util/Map.html#put(java.lang.Object,java.lang.Object)[put](def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#putAll(java.util.Map)[putAll](Map) -* def {java11-javadoc}/java.base/java/util/Map.html#putIfAbsent(java.lang.Object,java.lang.Object)[putIfAbsent](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object,java.lang.Object)[remove](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object)[replace](def, def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object,java.lang.Object)[replace](def, def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#replaceAll(java.util.function.BiFunction)[replaceAll](BiFunction) -* int {java11-javadoc}/java.base/java/util/Map.html#size()[size]() -* SortedMap {java11-javadoc}/java.base/java/util/SortedMap.html#subMap(java.lang.Object,java.lang.Object)[subMap](def, def) -* NavigableMap {java11-javadoc}/java.base/java/util/NavigableMap.html#subMap(java.lang.Object,boolean,java.lang.Object,boolean)[subMap](def, boolean, def, boolean) -* SortedMap {java11-javadoc}/java.base/java/util/SortedMap.html#tailMap(java.lang.Object)[tailMap](def) -* NavigableMap {java11-javadoc}/java.base/java/util/NavigableMap.html#tailMap(java.lang.Object,boolean)[tailMap](def, boolean) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Collection {java11-javadoc}/java.base/java/util/Map.html#values()[values]() - - -[[painless-api-reference-shared-NavigableSet]] -==== NavigableSet -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* def {java11-javadoc}/java.base/java/util/NavigableSet.html#ceiling(java.lang.Object)[ceiling](def) -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* Comparator {java11-javadoc}/java.base/java/util/SortedSet.html#comparator()[comparator]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* Iterator {java11-javadoc}/java.base/java/util/NavigableSet.html#descendingIterator()[descendingIterator]() -* NavigableSet {java11-javadoc}/java.base/java/util/NavigableSet.html#descendingSet()[descendingSet]() -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/Set.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* def {java11-javadoc}/java.base/java/util/SortedSet.html#first()[first]() -* def {java11-javadoc}/java.base/java/util/NavigableSet.html#floor(java.lang.Object)[floor](def) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/Set.html#hashCode()[hashCode]() -* SortedSet {java11-javadoc}/java.base/java/util/SortedSet.html#headSet(java.lang.Object)[headSet](def) -* NavigableSet {java11-javadoc}/java.base/java/util/NavigableSet.html#headSet(java.lang.Object,boolean)[headSet](def, boolean) -* def {java11-javadoc}/java.base/java/util/NavigableSet.html#higher(java.lang.Object)[higher](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* def {java11-javadoc}/java.base/java/util/SortedSet.html#last()[last]() -* def {java11-javadoc}/java.base/java/util/NavigableSet.html#lower(java.lang.Object)[lower](def) -* def {java11-javadoc}/java.base/java/util/NavigableSet.html#pollFirst()[pollFirst]() -* def {java11-javadoc}/java.base/java/util/NavigableSet.html#pollLast()[pollLast]() -* boolean {java11-javadoc}/java.base/java/util/Set.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* SortedSet {java11-javadoc}/java.base/java/util/SortedSet.html#subSet(java.lang.Object,java.lang.Object)[subSet](def, def) -* NavigableSet {java11-javadoc}/java.base/java/util/NavigableSet.html#subSet(java.lang.Object,boolean,java.lang.Object,boolean)[subSet](def, boolean, def, boolean) -* double sum() -* double sum(ToDoubleFunction) -* SortedSet {java11-javadoc}/java.base/java/util/SortedSet.html#tailSet(java.lang.Object)[tailSet](def) -* NavigableSet {java11-javadoc}/java.base/java/util/NavigableSet.html#tailSet(java.lang.Object,boolean)[tailSet](def, boolean) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-NoSuchElementException]] -==== NoSuchElementException -* {java11-javadoc}/java.base/java/util/NoSuchElementException.html#()[NoSuchElementException]() -* {java11-javadoc}/java.base/java/util/NoSuchElementException.html#(java.lang.String)[NoSuchElementException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Objects]] -==== Objects -* static int {java11-javadoc}/java.base/java/util/Objects.html#compare(java.lang.Object,java.lang.Object,java.util.Comparator)[compare](def, def, Comparator) -* static boolean {java11-javadoc}/java.base/java/util/Objects.html#deepEquals(java.lang.Object,java.lang.Object)[deepEquals](Object, Object) -* static boolean {java11-javadoc}/java.base/java/util/Objects.html#equals(java.lang.Object,java.lang.Object)[equals](Object, Object) -* static int {java11-javadoc}/java.base/java/util/Objects.html#hash(java.lang.Object%5B%5D)[hash](Object[]) -* static int {java11-javadoc}/java.base/java/util/Objects.html#hashCode(java.lang.Object)[hashCode](Object) -* static boolean {java11-javadoc}/java.base/java/util/Objects.html#isNull(java.lang.Object)[isNull](Object) -* static boolean {java11-javadoc}/java.base/java/util/Objects.html#nonNull(java.lang.Object)[nonNull](Object) -* static def {java11-javadoc}/java.base/java/util/Objects.html#requireNonNull(java.lang.Object)[requireNonNull](def) -* static def {java11-javadoc}/java.base/java/util/Objects.html#requireNonNull(java.lang.Object,java.lang.String)[requireNonNull](def, null) -* static null {java11-javadoc}/java.base/java/util/Objects.html#toString(java.lang.Object)[toString](Object) -* static null {java11-javadoc}/java.base/java/util/Objects.html#toString(java.lang.Object,java.lang.String)[toString](Object, null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Observable]] -==== Observable -* {java11-javadoc}/java.base/java/util/Observable.html#()[Observable]() -* void {java11-javadoc}/java.base/java/util/Observable.html#addObserver(java.util.Observer)[addObserver](Observer) -* int {java11-javadoc}/java.base/java/util/Observable.html#countObservers()[countObservers]() -* void {java11-javadoc}/java.base/java/util/Observable.html#deleteObserver(java.util.Observer)[deleteObserver](Observer) -* void {java11-javadoc}/java.base/java/util/Observable.html#deleteObservers()[deleteObservers]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/util/Observable.html#hasChanged()[hasChanged]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* void {java11-javadoc}/java.base/java/util/Observable.html#notifyObservers()[notifyObservers]() -* void {java11-javadoc}/java.base/java/util/Observable.html#notifyObservers(java.lang.Object)[notifyObservers](Object) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Observer]] -==== Observer -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* void {java11-javadoc}/java.base/java/util/Observer.html#update(java.util.Observable,java.lang.Object)[update](Observable, Object) - - -[[painless-api-reference-shared-Optional]] -==== Optional -* static Optional {java11-javadoc}/java.base/java/util/Optional.html#empty()[empty]() -* static Optional {java11-javadoc}/java.base/java/util/Optional.html#of(java.lang.Object)[of](def) -* static Optional {java11-javadoc}/java.base/java/util/Optional.html#ofNullable(java.lang.Object)[ofNullable](def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* Optional {java11-javadoc}/java.base/java/util/Optional.html#filter(java.util.function.Predicate)[filter](Predicate) -* Optional {java11-javadoc}/java.base/java/util/Optional.html#flatMap(java.util.function.Function)[flatMap](Function) -* def {java11-javadoc}/java.base/java/util/Optional.html#get()[get]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* void {java11-javadoc}/java.base/java/util/Optional.html#ifPresent(java.util.function.Consumer)[ifPresent](Consumer) -* boolean {java11-javadoc}/java.base/java/util/Optional.html#isPresent()[isPresent]() -* Optional {java11-javadoc}/java.base/java/util/Optional.html#map(java.util.function.Function)[map](Function) -* def {java11-javadoc}/java.base/java/util/Optional.html#orElse(java.lang.Object)[orElse](def) -* def {java11-javadoc}/java.base/java/util/Optional.html#orElseGet(java.util.function.Supplier)[orElseGet](Supplier) -* def {java11-javadoc}/java.base/java/util/Optional.html#orElseThrow(java.util.function.Supplier)[orElseThrow](Supplier) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-OptionalDouble]] -==== OptionalDouble -* static OptionalDouble {java11-javadoc}/java.base/java/util/OptionalDouble.html#empty()[empty]() -* static OptionalDouble {java11-javadoc}/java.base/java/util/OptionalDouble.html#of(double)[of](double) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* double {java11-javadoc}/java.base/java/util/OptionalDouble.html#getAsDouble()[getAsDouble]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* void {java11-javadoc}/java.base/java/util/OptionalDouble.html#ifPresent(java.util.function.DoubleConsumer)[ifPresent](DoubleConsumer) -* boolean {java11-javadoc}/java.base/java/util/OptionalDouble.html#isPresent()[isPresent]() -* double {java11-javadoc}/java.base/java/util/OptionalDouble.html#orElse(double)[orElse](double) -* double {java11-javadoc}/java.base/java/util/OptionalDouble.html#orElseGet(java.util.function.DoubleSupplier)[orElseGet](DoubleSupplier) -* double {java11-javadoc}/java.base/java/util/OptionalDouble.html#orElseThrow(java.util.function.Supplier)[orElseThrow](Supplier) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-OptionalInt]] -==== OptionalInt -* static OptionalInt {java11-javadoc}/java.base/java/util/OptionalInt.html#empty()[empty]() -* static OptionalInt {java11-javadoc}/java.base/java/util/OptionalInt.html#of(int)[of](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/util/OptionalInt.html#getAsInt()[getAsInt]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* void {java11-javadoc}/java.base/java/util/OptionalInt.html#ifPresent(java.util.function.IntConsumer)[ifPresent](IntConsumer) -* boolean {java11-javadoc}/java.base/java/util/OptionalInt.html#isPresent()[isPresent]() -* int {java11-javadoc}/java.base/java/util/OptionalInt.html#orElse(int)[orElse](int) -* int {java11-javadoc}/java.base/java/util/OptionalInt.html#orElseGet(java.util.function.IntSupplier)[orElseGet](IntSupplier) -* int {java11-javadoc}/java.base/java/util/OptionalInt.html#orElseThrow(java.util.function.Supplier)[orElseThrow](Supplier) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-OptionalLong]] -==== OptionalLong -* static OptionalLong {java11-javadoc}/java.base/java/util/OptionalLong.html#empty()[empty]() -* static OptionalLong {java11-javadoc}/java.base/java/util/OptionalLong.html#of(long)[of](long) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/util/OptionalLong.html#getAsLong()[getAsLong]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* void {java11-javadoc}/java.base/java/util/OptionalLong.html#ifPresent(java.util.function.LongConsumer)[ifPresent](LongConsumer) -* boolean {java11-javadoc}/java.base/java/util/OptionalLong.html#isPresent()[isPresent]() -* long {java11-javadoc}/java.base/java/util/OptionalLong.html#orElse(long)[orElse](long) -* long {java11-javadoc}/java.base/java/util/OptionalLong.html#orElseGet(java.util.function.LongSupplier)[orElseGet](LongSupplier) -* long {java11-javadoc}/java.base/java/util/OptionalLong.html#orElseThrow(java.util.function.Supplier)[orElseThrow](Supplier) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-PrimitiveIterator]] -==== PrimitiveIterator -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* void {java11-javadoc}/java.base/java/util/PrimitiveIterator.html#forEachRemaining(java.lang.Object)[forEachRemaining](def) -* boolean {java11-javadoc}/java.base/java/util/Iterator.html#hasNext()[hasNext]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* def {java11-javadoc}/java.base/java/util/Iterator.html#next()[next]() -* void {java11-javadoc}/java.base/java/util/Iterator.html#remove()[remove]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-PrimitiveIterator-OfDouble]] -==== PrimitiveIterator.OfDouble -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* void {java11-javadoc}/java.base/java/util/PrimitiveIterator.html#forEachRemaining(java.lang.Object)[forEachRemaining](def) -* boolean {java11-javadoc}/java.base/java/util/Iterator.html#hasNext()[hasNext]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Double {java11-javadoc}/java.base/java/util/PrimitiveIterator$OfDouble.html#next()[next]() -* double {java11-javadoc}/java.base/java/util/PrimitiveIterator$OfDouble.html#nextDouble()[nextDouble]() -* void {java11-javadoc}/java.base/java/util/Iterator.html#remove()[remove]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-PrimitiveIterator-OfInt]] -==== PrimitiveIterator.OfInt -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* void {java11-javadoc}/java.base/java/util/PrimitiveIterator.html#forEachRemaining(java.lang.Object)[forEachRemaining](def) -* boolean {java11-javadoc}/java.base/java/util/Iterator.html#hasNext()[hasNext]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Integer {java11-javadoc}/java.base/java/util/PrimitiveIterator$OfInt.html#next()[next]() -* int {java11-javadoc}/java.base/java/util/PrimitiveIterator$OfInt.html#nextInt()[nextInt]() -* void {java11-javadoc}/java.base/java/util/Iterator.html#remove()[remove]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-PrimitiveIterator-OfLong]] -==== PrimitiveIterator.OfLong -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* void {java11-javadoc}/java.base/java/util/PrimitiveIterator.html#forEachRemaining(java.lang.Object)[forEachRemaining](def) -* boolean {java11-javadoc}/java.base/java/util/Iterator.html#hasNext()[hasNext]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Long {java11-javadoc}/java.base/java/util/PrimitiveIterator$OfLong.html#next()[next]() -* long {java11-javadoc}/java.base/java/util/PrimitiveIterator$OfLong.html#nextLong()[nextLong]() -* void {java11-javadoc}/java.base/java/util/Iterator.html#remove()[remove]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-PriorityQueue]] -==== PriorityQueue -* {java11-javadoc}/java.base/java/util/PriorityQueue.html#()[PriorityQueue]() -* {java11-javadoc}/java.base/java/util/PriorityQueue.html#(java.util.Comparator)[PriorityQueue](Comparator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* def {java11-javadoc}/java.base/java/util/Queue.html#element()[element]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* boolean {java11-javadoc}/java.base/java/util/Queue.html#offer(java.lang.Object)[offer](def) -* def {java11-javadoc}/java.base/java/util/Queue.html#peek()[peek]() -* def {java11-javadoc}/java.base/java/util/Queue.html#poll()[poll]() -* def {java11-javadoc}/java.base/java/util/Queue.html#remove()[remove]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Queue]] -==== Queue -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* def {java11-javadoc}/java.base/java/util/Queue.html#element()[element]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* boolean {java11-javadoc}/java.base/java/util/Queue.html#offer(java.lang.Object)[offer](def) -* def {java11-javadoc}/java.base/java/util/Queue.html#peek()[peek]() -* def {java11-javadoc}/java.base/java/util/Queue.html#poll()[poll]() -* def {java11-javadoc}/java.base/java/util/Queue.html#remove()[remove]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Random]] -==== Random -* {java11-javadoc}/java.base/java/util/Random.html#()[Random]() -* {java11-javadoc}/java.base/java/util/Random.html#(long)[Random](long) -* DoubleStream {java11-javadoc}/java.base/java/util/Random.html#doubles(long)[doubles](long) -* DoubleStream {java11-javadoc}/java.base/java/util/Random.html#doubles(long,double,double)[doubles](long, double, double) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* IntStream {java11-javadoc}/java.base/java/util/Random.html#ints(long)[ints](long) -* IntStream {java11-javadoc}/java.base/java/util/Random.html#ints(long,int,int)[ints](long, int, int) -* LongStream {java11-javadoc}/java.base/java/util/Random.html#longs(long)[longs](long) -* LongStream {java11-javadoc}/java.base/java/util/Random.html#longs(long,long,long)[longs](long, long, long) -* boolean {java11-javadoc}/java.base/java/util/Random.html#nextBoolean()[nextBoolean]() -* void {java11-javadoc}/java.base/java/util/Random.html#nextBytes(byte%5B%5D)[nextBytes](byte[]) -* double {java11-javadoc}/java.base/java/util/Random.html#nextDouble()[nextDouble]() -* float {java11-javadoc}/java.base/java/util/Random.html#nextFloat()[nextFloat]() -* double {java11-javadoc}/java.base/java/util/Random.html#nextGaussian()[nextGaussian]() -* int {java11-javadoc}/java.base/java/util/Random.html#nextInt()[nextInt]() -* int {java11-javadoc}/java.base/java/util/Random.html#nextInt(int)[nextInt](int) -* long {java11-javadoc}/java.base/java/util/Random.html#nextLong()[nextLong]() -* void {java11-javadoc}/java.base/java/util/Random.html#setSeed(long)[setSeed](long) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-RandomAccess]] -==== RandomAccess -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Set]] -==== Set -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/Set.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/Set.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* boolean {java11-javadoc}/java.base/java/util/Set.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-SimpleTimeZone]] -==== SimpleTimeZone -* static int {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#STANDARD_TIME[STANDARD_TIME] -* static int {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#UTC_TIME[UTC_TIME] -* static int {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#WALL_TIME[WALL_TIME] -* {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#(int,java.lang.String,int,int,int,int,int,int,int,int)[SimpleTimeZone](int, null, int, int, int, int, int, int, int, int) -* {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#(int,java.lang.String,int,int,int,int,int,int,int,int,int)[SimpleTimeZone](int, null, int, int, int, int, int, int, int, int, int) -* {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#(int,java.lang.String,int,int,int,int,int,int,int,int,int,int,int)[SimpleTimeZone](int, null, int, int, int, int, int, int, int, int, int, int, int) -* {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#(int,java.lang.String)[SimpleTimeZone](int, null) -* def {java11-javadoc}/java.base/java/util/TimeZone.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#getDSTSavings()[getDSTSavings]() -* null {java11-javadoc}/java.base/java/util/TimeZone.html#getDisplayName()[getDisplayName]() -* null {java11-javadoc}/java.base/java/util/TimeZone.html#getDisplayName(java.util.Locale)[getDisplayName](Locale) -* null {java11-javadoc}/java.base/java/util/TimeZone.html#getDisplayName(boolean,int)[getDisplayName](boolean, int) -* null {java11-javadoc}/java.base/java/util/TimeZone.html#getDisplayName(boolean,int,java.util.Locale)[getDisplayName](boolean, int, Locale) -* null {java11-javadoc}/java.base/java/util/TimeZone.html#getID()[getID]() -* int {java11-javadoc}/java.base/java/util/TimeZone.html#getOffset(long)[getOffset](long) -* int {java11-javadoc}/java.base/java/util/TimeZone.html#getOffset(int,int,int,int,int,int)[getOffset](int, int, int, int, int, int) -* int {java11-javadoc}/java.base/java/util/TimeZone.html#getRawOffset()[getRawOffset]() -* boolean {java11-javadoc}/java.base/java/util/TimeZone.html#hasSameRules(java.util.TimeZone)[hasSameRules](TimeZone) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/TimeZone.html#inDaylightTime(java.util.Date)[inDaylightTime](Date) -* boolean {java11-javadoc}/java.base/java/util/TimeZone.html#observesDaylightTime()[observesDaylightTime]() -* void {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#setDSTSavings(int)[setDSTSavings](int) -* void {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#setEndRule(int,int,int)[setEndRule](int, int, int) -* void {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#setEndRule(int,int,int,int)[setEndRule](int, int, int, int) -* void {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#setEndRule(int,int,int,int,boolean)[setEndRule](int, int, int, int, boolean) -* void {java11-javadoc}/java.base/java/util/TimeZone.html#setRawOffset(int)[setRawOffset](int) -* void {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#setStartRule(int,int,int)[setStartRule](int, int, int) -* void {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#setStartRule(int,int,int,int)[setStartRule](int, int, int, int) -* void {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#setStartRule(int,int,int,int,boolean)[setStartRule](int, int, int, int, boolean) -* void {java11-javadoc}/java.base/java/util/SimpleTimeZone.html#setStartYear(int)[setStartYear](int) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* ZoneId {java11-javadoc}/java.base/java/util/TimeZone.html#toZoneId()[toZoneId]() -* boolean {java11-javadoc}/java.base/java/util/TimeZone.html#useDaylightTime()[useDaylightTime]() - - -[[painless-api-reference-shared-SortedMap]] -==== SortedMap -* void {java11-javadoc}/java.base/java/util/Map.html#clear()[clear]() -* List collect(BiFunction) -* def collect(Collection, BiFunction) -* Comparator {java11-javadoc}/java.base/java/util/SortedMap.html#comparator()[comparator]() -* def {java11-javadoc}/java.base/java/util/Map.html#compute(java.lang.Object,java.util.function.BiFunction)[compute](def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfAbsent(java.lang.Object,java.util.function.Function)[computeIfAbsent](def, Function) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfPresent(java.lang.Object,java.util.function.BiFunction)[computeIfPresent](def, BiFunction) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsKey(java.lang.Object)[containsKey](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsValue(java.lang.Object)[containsValue](def) -* int count(BiPredicate) -* def each(BiConsumer) -* Set {java11-javadoc}/java.base/java/util/Map.html#entrySet()[entrySet]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#equals(java.lang.Object)[equals](Object) -* boolean every(BiPredicate) -* Map.Entry find(BiPredicate) -* Map findAll(BiPredicate) -* def findResult(BiFunction) -* def findResult(def, BiFunction) -* List findResults(BiFunction) -* def {java11-javadoc}/java.base/java/util/SortedMap.html#firstKey()[firstKey]() -* void {java11-javadoc}/java.base/java/util/Map.html#forEach(java.util.function.BiConsumer)[forEach](BiConsumer) -* def {java11-javadoc}/java.base/java/util/Map.html#get(java.lang.Object)[get](def) -* Object getByPath(null) -* Object getByPath(null, Object) -* def {java11-javadoc}/java.base/java/util/Map.html#getOrDefault(java.lang.Object,java.lang.Object)[getOrDefault](def, def) -* Map groupBy(BiFunction) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* SortedMap {java11-javadoc}/java.base/java/util/SortedMap.html#headMap(java.lang.Object)[headMap](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#isEmpty()[isEmpty]() -* Set {java11-javadoc}/java.base/java/util/Map.html#keySet()[keySet]() -* def {java11-javadoc}/java.base/java/util/SortedMap.html#lastKey()[lastKey]() -* def {java11-javadoc}/java.base/java/util/Map.html#merge(java.lang.Object,java.lang.Object,java.util.function.BiFunction)[merge](def, def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#put(java.lang.Object,java.lang.Object)[put](def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#putAll(java.util.Map)[putAll](Map) -* def {java11-javadoc}/java.base/java/util/Map.html#putIfAbsent(java.lang.Object,java.lang.Object)[putIfAbsent](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object,java.lang.Object)[remove](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object)[replace](def, def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object,java.lang.Object)[replace](def, def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#replaceAll(java.util.function.BiFunction)[replaceAll](BiFunction) -* int {java11-javadoc}/java.base/java/util/Map.html#size()[size]() -* SortedMap {java11-javadoc}/java.base/java/util/SortedMap.html#subMap(java.lang.Object,java.lang.Object)[subMap](def, def) -* SortedMap {java11-javadoc}/java.base/java/util/SortedMap.html#tailMap(java.lang.Object)[tailMap](def) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Collection {java11-javadoc}/java.base/java/util/Map.html#values()[values]() - - -[[painless-api-reference-shared-SortedSet]] -==== SortedSet -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* Comparator {java11-javadoc}/java.base/java/util/SortedSet.html#comparator()[comparator]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/Set.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* def {java11-javadoc}/java.base/java/util/SortedSet.html#first()[first]() -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/Set.html#hashCode()[hashCode]() -* SortedSet {java11-javadoc}/java.base/java/util/SortedSet.html#headSet(java.lang.Object)[headSet](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* def {java11-javadoc}/java.base/java/util/SortedSet.html#last()[last]() -* boolean {java11-javadoc}/java.base/java/util/Set.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* SortedSet {java11-javadoc}/java.base/java/util/SortedSet.html#subSet(java.lang.Object,java.lang.Object)[subSet](def, def) -* double sum() -* double sum(ToDoubleFunction) -* SortedSet {java11-javadoc}/java.base/java/util/SortedSet.html#tailSet(java.lang.Object)[tailSet](def) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Spliterator]] -==== Spliterator -* static int {java11-javadoc}/java.base/java/util/Spliterator.html#CONCURRENT[CONCURRENT] -* static int {java11-javadoc}/java.base/java/util/Spliterator.html#DISTINCT[DISTINCT] -* static int {java11-javadoc}/java.base/java/util/Spliterator.html#IMMUTABLE[IMMUTABLE] -* static int {java11-javadoc}/java.base/java/util/Spliterator.html#NONNULL[NONNULL] -* static int {java11-javadoc}/java.base/java/util/Spliterator.html#ORDERED[ORDERED] -* static int {java11-javadoc}/java.base/java/util/Spliterator.html#SIZED[SIZED] -* static int {java11-javadoc}/java.base/java/util/Spliterator.html#SORTED[SORTED] -* static int {java11-javadoc}/java.base/java/util/Spliterator.html#SUBSIZED[SUBSIZED] -* int {java11-javadoc}/java.base/java/util/Spliterator.html#characteristics()[characteristics]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/util/Spliterator.html#estimateSize()[estimateSize]() -* void {java11-javadoc}/java.base/java/util/Spliterator.html#forEachRemaining(java.util.function.Consumer)[forEachRemaining](Consumer) -* Comparator {java11-javadoc}/java.base/java/util/Spliterator.html#getComparator()[getComparator]() -* long {java11-javadoc}/java.base/java/util/Spliterator.html#getExactSizeIfKnown()[getExactSizeIfKnown]() -* boolean {java11-javadoc}/java.base/java/util/Spliterator.html#hasCharacteristics(int)[hasCharacteristics](int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* boolean {java11-javadoc}/java.base/java/util/Spliterator.html#tryAdvance(java.util.function.Consumer)[tryAdvance](Consumer) -* Spliterator {java11-javadoc}/java.base/java/util/Spliterator.html#trySplit()[trySplit]() - - -[[painless-api-reference-shared-Spliterator-OfDouble]] -==== Spliterator.OfDouble -* int {java11-javadoc}/java.base/java/util/Spliterator.html#characteristics()[characteristics]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/util/Spliterator.html#estimateSize()[estimateSize]() -* void {java11-javadoc}/java.base/java/util/Spliterator$OfPrimitive.html#forEachRemaining(java.lang.Object)[forEachRemaining](def) -* Comparator {java11-javadoc}/java.base/java/util/Spliterator.html#getComparator()[getComparator]() -* long {java11-javadoc}/java.base/java/util/Spliterator.html#getExactSizeIfKnown()[getExactSizeIfKnown]() -* boolean {java11-javadoc}/java.base/java/util/Spliterator.html#hasCharacteristics(int)[hasCharacteristics](int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* boolean {java11-javadoc}/java.base/java/util/Spliterator$OfPrimitive.html#tryAdvance(java.lang.Object)[tryAdvance](def) -* Spliterator.OfDouble {java11-javadoc}/java.base/java/util/Spliterator$OfDouble.html#trySplit()[trySplit]() - - -[[painless-api-reference-shared-Spliterator-OfInt]] -==== Spliterator.OfInt -* int {java11-javadoc}/java.base/java/util/Spliterator.html#characteristics()[characteristics]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/util/Spliterator.html#estimateSize()[estimateSize]() -* void {java11-javadoc}/java.base/java/util/Spliterator$OfPrimitive.html#forEachRemaining(java.lang.Object)[forEachRemaining](def) -* Comparator {java11-javadoc}/java.base/java/util/Spliterator.html#getComparator()[getComparator]() -* long {java11-javadoc}/java.base/java/util/Spliterator.html#getExactSizeIfKnown()[getExactSizeIfKnown]() -* boolean {java11-javadoc}/java.base/java/util/Spliterator.html#hasCharacteristics(int)[hasCharacteristics](int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* boolean {java11-javadoc}/java.base/java/util/Spliterator$OfPrimitive.html#tryAdvance(java.lang.Object)[tryAdvance](def) -* Spliterator.OfInt {java11-javadoc}/java.base/java/util/Spliterator$OfInt.html#trySplit()[trySplit]() - - -[[painless-api-reference-shared-Spliterator-OfLong]] -==== Spliterator.OfLong -* int {java11-javadoc}/java.base/java/util/Spliterator.html#characteristics()[characteristics]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/util/Spliterator.html#estimateSize()[estimateSize]() -* void {java11-javadoc}/java.base/java/util/Spliterator$OfPrimitive.html#forEachRemaining(java.lang.Object)[forEachRemaining](def) -* Comparator {java11-javadoc}/java.base/java/util/Spliterator.html#getComparator()[getComparator]() -* long {java11-javadoc}/java.base/java/util/Spliterator.html#getExactSizeIfKnown()[getExactSizeIfKnown]() -* boolean {java11-javadoc}/java.base/java/util/Spliterator.html#hasCharacteristics(int)[hasCharacteristics](int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* boolean {java11-javadoc}/java.base/java/util/Spliterator$OfPrimitive.html#tryAdvance(java.lang.Object)[tryAdvance](def) -* Spliterator.OfLong {java11-javadoc}/java.base/java/util/Spliterator$OfLong.html#trySplit()[trySplit]() - - -[[painless-api-reference-shared-Spliterator-OfPrimitive]] -==== Spliterator.OfPrimitive -* int {java11-javadoc}/java.base/java/util/Spliterator.html#characteristics()[characteristics]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/util/Spliterator.html#estimateSize()[estimateSize]() -* void {java11-javadoc}/java.base/java/util/Spliterator$OfPrimitive.html#forEachRemaining(java.lang.Object)[forEachRemaining](def) -* Comparator {java11-javadoc}/java.base/java/util/Spliterator.html#getComparator()[getComparator]() -* long {java11-javadoc}/java.base/java/util/Spliterator.html#getExactSizeIfKnown()[getExactSizeIfKnown]() -* boolean {java11-javadoc}/java.base/java/util/Spliterator.html#hasCharacteristics(int)[hasCharacteristics](int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* boolean {java11-javadoc}/java.base/java/util/Spliterator$OfPrimitive.html#tryAdvance(java.lang.Object)[tryAdvance](def) -* Spliterator.OfPrimitive {java11-javadoc}/java.base/java/util/Spliterator$OfPrimitive.html#trySplit()[trySplit]() - - -[[painless-api-reference-shared-Spliterators]] -==== Spliterators -* static Spliterator.OfDouble {java11-javadoc}/java.base/java/util/Spliterators.html#emptyDoubleSpliterator()[emptyDoubleSpliterator]() -* static Spliterator.OfInt {java11-javadoc}/java.base/java/util/Spliterators.html#emptyIntSpliterator()[emptyIntSpliterator]() -* static Spliterator.OfLong {java11-javadoc}/java.base/java/util/Spliterators.html#emptyLongSpliterator()[emptyLongSpliterator]() -* static Spliterator {java11-javadoc}/java.base/java/util/Spliterators.html#emptySpliterator()[emptySpliterator]() -* static Iterator {java11-javadoc}/java.base/java/util/Spliterators.html#iterator(java.util.Spliterator)[iterator](Spliterator) -* static Spliterator {java11-javadoc}/java.base/java/util/Spliterators.html#spliterator(java.util.Collection,int)[spliterator](Collection, int) -* static Spliterator {java11-javadoc}/java.base/java/util/Spliterators.html#spliterator(java.util.Iterator,long,int)[spliterator](Iterator, long, int) -* static Spliterator {java11-javadoc}/java.base/java/util/Spliterators.html#spliteratorUnknownSize(java.util.Iterator,int)[spliteratorUnknownSize](Iterator, int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Stack]] -==== Stack -* {java11-javadoc}/java.base/java/util/Stack.html#()[Stack]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* void {java11-javadoc}/java.base/java/util/Vector.html#addElement(java.lang.Object)[addElement](def) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* def {java11-javadoc}/java.base/java/util/Vector.html#clone()[clone]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* void {java11-javadoc}/java.base/java/util/Vector.html#copyInto(java.lang.Object%5B%5D)[copyInto](Object[]) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* def {java11-javadoc}/java.base/java/util/Vector.html#elementAt(int)[elementAt](int) -* Enumeration {java11-javadoc}/java.base/java/util/Vector.html#elements()[elements]() -* boolean {java11-javadoc}/java.base/java/util/Stack.html#empty()[empty]() -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* def {java11-javadoc}/java.base/java/util/Vector.html#firstElement()[firstElement]() -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* def {java11-javadoc}/java.base/java/util/List.html#get(int)[get](int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* void {java11-javadoc}/java.base/java/util/Vector.html#insertElementAt(java.lang.Object,int)[insertElementAt](def, int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* def {java11-javadoc}/java.base/java/util/Vector.html#lastElement()[lastElement]() -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* int {java11-javadoc}/java.base/java/util/Vector.html#lastIndexOf(java.lang.Object,int)[lastIndexOf](def, int) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/Stack.html#peek()[peek]() -* def {java11-javadoc}/java.base/java/util/Stack.html#pop()[pop]() -* def {java11-javadoc}/java.base/java/util/Stack.html#push(java.lang.Object)[push](def) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* void {java11-javadoc}/java.base/java/util/Vector.html#removeAllElements()[removeAllElements]() -* boolean {java11-javadoc}/java.base/java/util/Vector.html#removeElement(java.lang.Object)[removeElement](def) -* void {java11-javadoc}/java.base/java/util/Vector.html#removeElementAt(int)[removeElementAt](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Stack.html#search(java.lang.Object)[search](def) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* void {java11-javadoc}/java.base/java/util/Vector.html#setElementAt(java.lang.Object,int)[setElementAt](def, int) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-StringJoiner]] -==== StringJoiner -* {java11-javadoc}/java.base/java/util/StringJoiner.html#(java.lang.CharSequence)[StringJoiner](CharSequence) -* {java11-javadoc}/java.base/java/util/StringJoiner.html#(java.lang.CharSequence,java.lang.CharSequence,java.lang.CharSequence)[StringJoiner](CharSequence, CharSequence, CharSequence) -* StringJoiner {java11-javadoc}/java.base/java/util/StringJoiner.html#add(java.lang.CharSequence)[add](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/StringJoiner.html#length()[length]() -* StringJoiner {java11-javadoc}/java.base/java/util/StringJoiner.html#merge(java.util.StringJoiner)[merge](StringJoiner) -* StringJoiner {java11-javadoc}/java.base/java/util/StringJoiner.html#setEmptyValue(java.lang.CharSequence)[setEmptyValue](CharSequence) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-StringTokenizer]] -==== StringTokenizer -* {java11-javadoc}/java.base/java/util/StringTokenizer.html#(java.lang.String)[StringTokenizer](null) -* {java11-javadoc}/java.base/java/util/StringTokenizer.html#(java.lang.String,java.lang.String)[StringTokenizer](null, null) -* {java11-javadoc}/java.base/java/util/StringTokenizer.html#(java.lang.String,java.lang.String,boolean)[StringTokenizer](null, null, boolean) -* int {java11-javadoc}/java.base/java/util/StringTokenizer.html#countTokens()[countTokens]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/util/Enumeration.html#hasMoreElements()[hasMoreElements]() -* boolean {java11-javadoc}/java.base/java/util/StringTokenizer.html#hasMoreTokens()[hasMoreTokens]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* def {java11-javadoc}/java.base/java/util/Enumeration.html#nextElement()[nextElement]() -* null {java11-javadoc}/java.base/java/util/StringTokenizer.html#nextToken()[nextToken]() -* null {java11-javadoc}/java.base/java/util/StringTokenizer.html#nextToken(java.lang.String)[nextToken](null) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-TimeZone]] -==== TimeZone -* static int {java11-javadoc}/java.base/java/util/TimeZone.html#LONG[LONG] -* static int {java11-javadoc}/java.base/java/util/TimeZone.html#SHORT[SHORT] -* static null[] {java11-javadoc}/java.base/java/util/TimeZone.html#getAvailableIDs()[getAvailableIDs]() -* static null[] {java11-javadoc}/java.base/java/util/TimeZone.html#getAvailableIDs(int)[getAvailableIDs](int) -* static TimeZone {java11-javadoc}/java.base/java/util/TimeZone.html#getDefault()[getDefault]() -* static TimeZone {java11-javadoc}/java.base/java/util/TimeZone.html#getTimeZone(java.lang.String)[getTimeZone](null) -* def {java11-javadoc}/java.base/java/util/TimeZone.html#clone()[clone]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/util/TimeZone.html#getDSTSavings()[getDSTSavings]() -* null {java11-javadoc}/java.base/java/util/TimeZone.html#getDisplayName()[getDisplayName]() -* null {java11-javadoc}/java.base/java/util/TimeZone.html#getDisplayName(java.util.Locale)[getDisplayName](Locale) -* null {java11-javadoc}/java.base/java/util/TimeZone.html#getDisplayName(boolean,int)[getDisplayName](boolean, int) -* null {java11-javadoc}/java.base/java/util/TimeZone.html#getDisplayName(boolean,int,java.util.Locale)[getDisplayName](boolean, int, Locale) -* null {java11-javadoc}/java.base/java/util/TimeZone.html#getID()[getID]() -* int {java11-javadoc}/java.base/java/util/TimeZone.html#getOffset(long)[getOffset](long) -* int {java11-javadoc}/java.base/java/util/TimeZone.html#getOffset(int,int,int,int,int,int)[getOffset](int, int, int, int, int, int) -* int {java11-javadoc}/java.base/java/util/TimeZone.html#getRawOffset()[getRawOffset]() -* boolean {java11-javadoc}/java.base/java/util/TimeZone.html#hasSameRules(java.util.TimeZone)[hasSameRules](TimeZone) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/TimeZone.html#inDaylightTime(java.util.Date)[inDaylightTime](Date) -* boolean {java11-javadoc}/java.base/java/util/TimeZone.html#observesDaylightTime()[observesDaylightTime]() -* void {java11-javadoc}/java.base/java/util/TimeZone.html#setRawOffset(int)[setRawOffset](int) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* ZoneId {java11-javadoc}/java.base/java/util/TimeZone.html#toZoneId()[toZoneId]() -* boolean {java11-javadoc}/java.base/java/util/TimeZone.html#useDaylightTime()[useDaylightTime]() - - -[[painless-api-reference-shared-TooManyListenersException]] -==== TooManyListenersException -* {java11-javadoc}/java.base/java/util/TooManyListenersException.html#()[TooManyListenersException]() -* {java11-javadoc}/java.base/java/util/TooManyListenersException.html#(java.lang.String)[TooManyListenersException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-TreeMap]] -==== TreeMap -* {java11-javadoc}/java.base/java/util/TreeMap.html#()[TreeMap]() -* {java11-javadoc}/java.base/java/util/TreeMap.html#(java.util.Comparator)[TreeMap](Comparator) -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#ceilingEntry(java.lang.Object)[ceilingEntry](def) -* def {java11-javadoc}/java.base/java/util/NavigableMap.html#ceilingKey(java.lang.Object)[ceilingKey](def) -* void {java11-javadoc}/java.base/java/util/Map.html#clear()[clear]() -* def {java11-javadoc}/java.base/java/util/TreeMap.html#clone()[clone]() -* List collect(BiFunction) -* def collect(Collection, BiFunction) -* Comparator {java11-javadoc}/java.base/java/util/SortedMap.html#comparator()[comparator]() -* def {java11-javadoc}/java.base/java/util/Map.html#compute(java.lang.Object,java.util.function.BiFunction)[compute](def, BiFunction) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfAbsent(java.lang.Object,java.util.function.Function)[computeIfAbsent](def, Function) -* def {java11-javadoc}/java.base/java/util/Map.html#computeIfPresent(java.lang.Object,java.util.function.BiFunction)[computeIfPresent](def, BiFunction) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsKey(java.lang.Object)[containsKey](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#containsValue(java.lang.Object)[containsValue](def) -* int count(BiPredicate) -* NavigableSet {java11-javadoc}/java.base/java/util/NavigableMap.html#descendingKeySet()[descendingKeySet]() -* NavigableMap {java11-javadoc}/java.base/java/util/NavigableMap.html#descendingMap()[descendingMap]() -* def each(BiConsumer) -* Set {java11-javadoc}/java.base/java/util/Map.html#entrySet()[entrySet]() -* boolean {java11-javadoc}/java.base/java/util/Map.html#equals(java.lang.Object)[equals](Object) -* boolean every(BiPredicate) -* Map.Entry find(BiPredicate) -* Map findAll(BiPredicate) -* def findResult(BiFunction) -* def findResult(def, BiFunction) -* List findResults(BiFunction) -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#firstEntry()[firstEntry]() -* def {java11-javadoc}/java.base/java/util/SortedMap.html#firstKey()[firstKey]() -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#floorEntry(java.lang.Object)[floorEntry](def) -* def {java11-javadoc}/java.base/java/util/NavigableMap.html#floorKey(java.lang.Object)[floorKey](def) -* void {java11-javadoc}/java.base/java/util/Map.html#forEach(java.util.function.BiConsumer)[forEach](BiConsumer) -* def {java11-javadoc}/java.base/java/util/Map.html#get(java.lang.Object)[get](def) -* Object getByPath(null) -* Object getByPath(null, Object) -* def {java11-javadoc}/java.base/java/util/Map.html#getOrDefault(java.lang.Object,java.lang.Object)[getOrDefault](def, def) -* Map groupBy(BiFunction) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* SortedMap {java11-javadoc}/java.base/java/util/SortedMap.html#headMap(java.lang.Object)[headMap](def) -* NavigableMap {java11-javadoc}/java.base/java/util/NavigableMap.html#headMap(java.lang.Object,boolean)[headMap](def, boolean) -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#higherEntry(java.lang.Object)[higherEntry](def) -* def {java11-javadoc}/java.base/java/util/NavigableMap.html#higherKey(java.lang.Object)[higherKey](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#isEmpty()[isEmpty]() -* Set {java11-javadoc}/java.base/java/util/Map.html#keySet()[keySet]() -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#lastEntry()[lastEntry]() -* def {java11-javadoc}/java.base/java/util/SortedMap.html#lastKey()[lastKey]() -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#lowerEntry(java.lang.Object)[lowerEntry](def) -* def {java11-javadoc}/java.base/java/util/Map.html#merge(java.lang.Object,java.lang.Object,java.util.function.BiFunction)[merge](def, def, BiFunction) -* NavigableSet {java11-javadoc}/java.base/java/util/NavigableMap.html#navigableKeySet()[navigableKeySet]() -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#pollFirstEntry()[pollFirstEntry]() -* Map.Entry {java11-javadoc}/java.base/java/util/NavigableMap.html#pollLastEntry()[pollLastEntry]() -* def {java11-javadoc}/java.base/java/util/Map.html#put(java.lang.Object,java.lang.Object)[put](def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#putAll(java.util.Map)[putAll](Map) -* def {java11-javadoc}/java.base/java/util/Map.html#putIfAbsent(java.lang.Object,java.lang.Object)[putIfAbsent](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#remove(java.lang.Object,java.lang.Object)[remove](def, def) -* def {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object)[replace](def, def) -* boolean {java11-javadoc}/java.base/java/util/Map.html#replace(java.lang.Object,java.lang.Object,java.lang.Object)[replace](def, def, def) -* void {java11-javadoc}/java.base/java/util/Map.html#replaceAll(java.util.function.BiFunction)[replaceAll](BiFunction) -* int {java11-javadoc}/java.base/java/util/Map.html#size()[size]() -* SortedMap {java11-javadoc}/java.base/java/util/SortedMap.html#subMap(java.lang.Object,java.lang.Object)[subMap](def, def) -* NavigableMap {java11-javadoc}/java.base/java/util/NavigableMap.html#subMap(java.lang.Object,boolean,java.lang.Object,boolean)[subMap](def, boolean, def, boolean) -* SortedMap {java11-javadoc}/java.base/java/util/SortedMap.html#tailMap(java.lang.Object)[tailMap](def) -* NavigableMap {java11-javadoc}/java.base/java/util/NavigableMap.html#tailMap(java.lang.Object,boolean)[tailMap](def, boolean) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Collection {java11-javadoc}/java.base/java/util/Map.html#values()[values]() - - -[[painless-api-reference-shared-TreeSet]] -==== TreeSet -* {java11-javadoc}/java.base/java/util/TreeSet.html#()[TreeSet]() -* {java11-javadoc}/java.base/java/util/TreeSet.html#(java.util.Comparator)[TreeSet](Comparator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* def {java11-javadoc}/java.base/java/util/NavigableSet.html#ceiling(java.lang.Object)[ceiling](def) -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* def {java11-javadoc}/java.base/java/util/TreeSet.html#clone()[clone]() -* List collect(Function) -* def collect(Collection, Function) -* Comparator {java11-javadoc}/java.base/java/util/SortedSet.html#comparator()[comparator]() -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* Iterator {java11-javadoc}/java.base/java/util/NavigableSet.html#descendingIterator()[descendingIterator]() -* NavigableSet {java11-javadoc}/java.base/java/util/NavigableSet.html#descendingSet()[descendingSet]() -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/Set.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* def {java11-javadoc}/java.base/java/util/SortedSet.html#first()[first]() -* def {java11-javadoc}/java.base/java/util/NavigableSet.html#floor(java.lang.Object)[floor](def) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/Set.html#hashCode()[hashCode]() -* SortedSet {java11-javadoc}/java.base/java/util/SortedSet.html#headSet(java.lang.Object)[headSet](def) -* NavigableSet {java11-javadoc}/java.base/java/util/NavigableSet.html#headSet(java.lang.Object,boolean)[headSet](def, boolean) -* def {java11-javadoc}/java.base/java/util/NavigableSet.html#higher(java.lang.Object)[higher](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* def {java11-javadoc}/java.base/java/util/SortedSet.html#last()[last]() -* def {java11-javadoc}/java.base/java/util/NavigableSet.html#lower(java.lang.Object)[lower](def) -* def {java11-javadoc}/java.base/java/util/NavigableSet.html#pollFirst()[pollFirst]() -* def {java11-javadoc}/java.base/java/util/NavigableSet.html#pollLast()[pollLast]() -* boolean {java11-javadoc}/java.base/java/util/Set.html#remove(java.lang.Object)[remove](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* SortedSet {java11-javadoc}/java.base/java/util/SortedSet.html#subSet(java.lang.Object,java.lang.Object)[subSet](def, def) -* NavigableSet {java11-javadoc}/java.base/java/util/NavigableSet.html#subSet(java.lang.Object,boolean,java.lang.Object,boolean)[subSet](def, boolean, def, boolean) -* double sum() -* double sum(ToDoubleFunction) -* SortedSet {java11-javadoc}/java.base/java/util/SortedSet.html#tailSet(java.lang.Object)[tailSet](def) -* NavigableSet {java11-javadoc}/java.base/java/util/NavigableSet.html#tailSet(java.lang.Object,boolean)[tailSet](def, boolean) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-UUID]] -==== UUID -* static UUID {java11-javadoc}/java.base/java/util/UUID.html#fromString(java.lang.String)[fromString](null) -* static UUID {java11-javadoc}/java.base/java/util/UUID.html#nameUUIDFromBytes(byte%5B%5D)[nameUUIDFromBytes](byte[]) -* static UUID {java11-javadoc}/java.base/java/util/UUID.html#randomUUID()[randomUUID]() -* {java11-javadoc}/java.base/java/util/UUID.html#(long,long)[UUID](long, long) -* int {java11-javadoc}/java.base/java/util/UUID.html#clockSequence()[clockSequence]() -* int {java11-javadoc}/java.base/java/util/UUID.html#compareTo(java.util.UUID)[compareTo](UUID) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/util/UUID.html#getLeastSignificantBits()[getLeastSignificantBits]() -* long {java11-javadoc}/java.base/java/util/UUID.html#getMostSignificantBits()[getMostSignificantBits]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* long {java11-javadoc}/java.base/java/util/UUID.html#node()[node]() -* long {java11-javadoc}/java.base/java/util/UUID.html#timestamp()[timestamp]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* int {java11-javadoc}/java.base/java/util/UUID.html#variant()[variant]() -* int {java11-javadoc}/java.base/java/util/UUID.html#version()[version]() - - -[[painless-api-reference-shared-UnknownFormatConversionException]] -==== UnknownFormatConversionException -* {java11-javadoc}/java.base/java/util/UnknownFormatConversionException.html#(java.lang.String)[UnknownFormatConversionException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/util/UnknownFormatConversionException.html#getConversion()[getConversion]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-UnknownFormatFlagsException]] -==== UnknownFormatFlagsException -* {java11-javadoc}/java.base/java/util/UnknownFormatFlagsException.html#(java.lang.String)[UnknownFormatFlagsException](null) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/util/UnknownFormatFlagsException.html#getFlags()[getFlags]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getLocalizedMessage()[getLocalizedMessage]() -* null {java11-javadoc}/java.base/java/lang/Throwable.html#getMessage()[getMessage]() -* StackTraceElement[] {java11-javadoc}/java.base/java/lang/Throwable.html#getStackTrace()[getStackTrace]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Vector]] -==== Vector -* {java11-javadoc}/java.base/java/util/Vector.html#()[Vector]() -* {java11-javadoc}/java.base/java/util/Vector.html#(java.util.Collection)[Vector](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* void {java11-javadoc}/java.base/java/util/Vector.html#addElement(java.lang.Object)[addElement](def) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* def {java11-javadoc}/java.base/java/util/Vector.html#clone()[clone]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* void {java11-javadoc}/java.base/java/util/Vector.html#copyInto(java.lang.Object%5B%5D)[copyInto](Object[]) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* def {java11-javadoc}/java.base/java/util/Vector.html#elementAt(int)[elementAt](int) -* Enumeration {java11-javadoc}/java.base/java/util/Vector.html#elements()[elements]() -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* def {java11-javadoc}/java.base/java/util/Vector.html#firstElement()[firstElement]() -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* def {java11-javadoc}/java.base/java/util/List.html#get(int)[get](int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* void {java11-javadoc}/java.base/java/util/Vector.html#insertElementAt(java.lang.Object,int)[insertElementAt](def, int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* def {java11-javadoc}/java.base/java/util/Vector.html#lastElement()[lastElement]() -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* int {java11-javadoc}/java.base/java/util/Vector.html#lastIndexOf(java.lang.Object,int)[lastIndexOf](def, int) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* void {java11-javadoc}/java.base/java/util/Vector.html#removeAllElements()[removeAllElements]() -* boolean {java11-javadoc}/java.base/java/util/Vector.html#removeElement(java.lang.Object)[removeElement](def) -* void {java11-javadoc}/java.base/java/util/Vector.html#removeElementAt(int)[removeElementAt](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* void {java11-javadoc}/java.base/java/util/Vector.html#setElementAt(java.lang.Object,int)[setElementAt](def, int) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-java-util-function"] -=== Shared API for package java.util.function -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-BiConsumer]] -==== BiConsumer -* void {java11-javadoc}/java.base/java/util/function/BiConsumer.html#accept(java.lang.Object,java.lang.Object)[accept](def, def) -* BiConsumer {java11-javadoc}/java.base/java/util/function/BiConsumer.html#andThen(java.util.function.BiConsumer)[andThen](BiConsumer) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-BiFunction]] -==== BiFunction -* BiFunction {java11-javadoc}/java.base/java/util/function/BiFunction.html#andThen(java.util.function.Function)[andThen](Function) -* def {java11-javadoc}/java.base/java/util/function/BiFunction.html#apply(java.lang.Object,java.lang.Object)[apply](def, def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-BiPredicate]] -==== BiPredicate -* BiPredicate {java11-javadoc}/java.base/java/util/function/BiPredicate.html#and(java.util.function.BiPredicate)[and](BiPredicate) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* BiPredicate {java11-javadoc}/java.base/java/util/function/BiPredicate.html#negate()[negate]() -* BiPredicate {java11-javadoc}/java.base/java/util/function/BiPredicate.html#or(java.util.function.BiPredicate)[or](BiPredicate) -* boolean {java11-javadoc}/java.base/java/util/function/BiPredicate.html#test(java.lang.Object,java.lang.Object)[test](def, def) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-BinaryOperator]] -==== BinaryOperator -* static BinaryOperator {java11-javadoc}/java.base/java/util/function/BinaryOperator.html#maxBy(java.util.Comparator)[maxBy](Comparator) -* static BinaryOperator {java11-javadoc}/java.base/java/util/function/BinaryOperator.html#minBy(java.util.Comparator)[minBy](Comparator) -* BiFunction {java11-javadoc}/java.base/java/util/function/BiFunction.html#andThen(java.util.function.Function)[andThen](Function) -* def {java11-javadoc}/java.base/java/util/function/BiFunction.html#apply(java.lang.Object,java.lang.Object)[apply](def, def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-BooleanSupplier]] -==== BooleanSupplier -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/util/function/BooleanSupplier.html#getAsBoolean()[getAsBoolean]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Consumer]] -==== Consumer -* void {java11-javadoc}/java.base/java/util/function/Consumer.html#accept(java.lang.Object)[accept](def) -* Consumer {java11-javadoc}/java.base/java/util/function/Consumer.html#andThen(java.util.function.Consumer)[andThen](Consumer) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DoubleBinaryOperator]] -==== DoubleBinaryOperator -* double {java11-javadoc}/java.base/java/util/function/DoubleBinaryOperator.html#applyAsDouble(double,double)[applyAsDouble](double, double) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DoubleConsumer]] -==== DoubleConsumer -* void {java11-javadoc}/java.base/java/util/function/DoubleConsumer.html#accept(double)[accept](double) -* DoubleConsumer {java11-javadoc}/java.base/java/util/function/DoubleConsumer.html#andThen(java.util.function.DoubleConsumer)[andThen](DoubleConsumer) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DoubleFunction]] -==== DoubleFunction -* def {java11-javadoc}/java.base/java/util/function/DoubleFunction.html#apply(double)[apply](double) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DoublePredicate]] -==== DoublePredicate -* DoublePredicate {java11-javadoc}/java.base/java/util/function/DoublePredicate.html#and(java.util.function.DoublePredicate)[and](DoublePredicate) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* DoublePredicate {java11-javadoc}/java.base/java/util/function/DoublePredicate.html#negate()[negate]() -* DoublePredicate {java11-javadoc}/java.base/java/util/function/DoublePredicate.html#or(java.util.function.DoublePredicate)[or](DoublePredicate) -* boolean {java11-javadoc}/java.base/java/util/function/DoublePredicate.html#test(double)[test](double) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DoubleSupplier]] -==== DoubleSupplier -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* double {java11-javadoc}/java.base/java/util/function/DoubleSupplier.html#getAsDouble()[getAsDouble]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DoubleToIntFunction]] -==== DoubleToIntFunction -* int {java11-javadoc}/java.base/java/util/function/DoubleToIntFunction.html#applyAsInt(double)[applyAsInt](double) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DoubleToLongFunction]] -==== DoubleToLongFunction -* long {java11-javadoc}/java.base/java/util/function/DoubleToLongFunction.html#applyAsLong(double)[applyAsLong](double) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DoubleUnaryOperator]] -==== DoubleUnaryOperator -* static DoubleUnaryOperator {java11-javadoc}/java.base/java/util/function/DoubleUnaryOperator.html#identity()[identity]() -* DoubleUnaryOperator {java11-javadoc}/java.base/java/util/function/DoubleUnaryOperator.html#andThen(java.util.function.DoubleUnaryOperator)[andThen](DoubleUnaryOperator) -* double {java11-javadoc}/java.base/java/util/function/DoubleUnaryOperator.html#applyAsDouble(double)[applyAsDouble](double) -* DoubleUnaryOperator {java11-javadoc}/java.base/java/util/function/DoubleUnaryOperator.html#compose(java.util.function.DoubleUnaryOperator)[compose](DoubleUnaryOperator) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Function]] -==== Function -* static Function {java11-javadoc}/java.base/java/util/function/Function.html#identity()[identity]() -* Function {java11-javadoc}/java.base/java/util/function/Function.html#andThen(java.util.function.Function)[andThen](Function) -* def {java11-javadoc}/java.base/java/util/function/Function.html#apply(java.lang.Object)[apply](def) -* Function {java11-javadoc}/java.base/java/util/function/Function.html#compose(java.util.function.Function)[compose](Function) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IntBinaryOperator]] -==== IntBinaryOperator -* int {java11-javadoc}/java.base/java/util/function/IntBinaryOperator.html#applyAsInt(int,int)[applyAsInt](int, int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IntConsumer]] -==== IntConsumer -* void {java11-javadoc}/java.base/java/util/function/IntConsumer.html#accept(int)[accept](int) -* IntConsumer {java11-javadoc}/java.base/java/util/function/IntConsumer.html#andThen(java.util.function.IntConsumer)[andThen](IntConsumer) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IntFunction]] -==== IntFunction -* def {java11-javadoc}/java.base/java/util/function/IntFunction.html#apply(int)[apply](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IntPredicate]] -==== IntPredicate -* IntPredicate {java11-javadoc}/java.base/java/util/function/IntPredicate.html#and(java.util.function.IntPredicate)[and](IntPredicate) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* IntPredicate {java11-javadoc}/java.base/java/util/function/IntPredicate.html#negate()[negate]() -* IntPredicate {java11-javadoc}/java.base/java/util/function/IntPredicate.html#or(java.util.function.IntPredicate)[or](IntPredicate) -* boolean {java11-javadoc}/java.base/java/util/function/IntPredicate.html#test(int)[test](int) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IntSupplier]] -==== IntSupplier -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/util/function/IntSupplier.html#getAsInt()[getAsInt]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IntToDoubleFunction]] -==== IntToDoubleFunction -* double {java11-javadoc}/java.base/java/util/function/IntToDoubleFunction.html#applyAsDouble(int)[applyAsDouble](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IntToLongFunction]] -==== IntToLongFunction -* long {java11-javadoc}/java.base/java/util/function/IntToLongFunction.html#applyAsLong(int)[applyAsLong](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IntUnaryOperator]] -==== IntUnaryOperator -* static IntUnaryOperator {java11-javadoc}/java.base/java/util/function/IntUnaryOperator.html#identity()[identity]() -* IntUnaryOperator {java11-javadoc}/java.base/java/util/function/IntUnaryOperator.html#andThen(java.util.function.IntUnaryOperator)[andThen](IntUnaryOperator) -* int {java11-javadoc}/java.base/java/util/function/IntUnaryOperator.html#applyAsInt(int)[applyAsInt](int) -* IntUnaryOperator {java11-javadoc}/java.base/java/util/function/IntUnaryOperator.html#compose(java.util.function.IntUnaryOperator)[compose](IntUnaryOperator) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-LongBinaryOperator]] -==== LongBinaryOperator -* long {java11-javadoc}/java.base/java/util/function/LongBinaryOperator.html#applyAsLong(long,long)[applyAsLong](long, long) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-LongConsumer]] -==== LongConsumer -* void {java11-javadoc}/java.base/java/util/function/LongConsumer.html#accept(long)[accept](long) -* LongConsumer {java11-javadoc}/java.base/java/util/function/LongConsumer.html#andThen(java.util.function.LongConsumer)[andThen](LongConsumer) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-LongFunction]] -==== LongFunction -* def {java11-javadoc}/java.base/java/util/function/LongFunction.html#apply(long)[apply](long) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-LongPredicate]] -==== LongPredicate -* LongPredicate {java11-javadoc}/java.base/java/util/function/LongPredicate.html#and(java.util.function.LongPredicate)[and](LongPredicate) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* LongPredicate {java11-javadoc}/java.base/java/util/function/LongPredicate.html#negate()[negate]() -* LongPredicate {java11-javadoc}/java.base/java/util/function/LongPredicate.html#or(java.util.function.LongPredicate)[or](LongPredicate) -* boolean {java11-javadoc}/java.base/java/util/function/LongPredicate.html#test(long)[test](long) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-LongSupplier]] -==== LongSupplier -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long {java11-javadoc}/java.base/java/util/function/LongSupplier.html#getAsLong()[getAsLong]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-LongToDoubleFunction]] -==== LongToDoubleFunction -* double {java11-javadoc}/java.base/java/util/function/LongToDoubleFunction.html#applyAsDouble(long)[applyAsDouble](long) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-LongToIntFunction]] -==== LongToIntFunction -* int {java11-javadoc}/java.base/java/util/function/LongToIntFunction.html#applyAsInt(long)[applyAsInt](long) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-LongUnaryOperator]] -==== LongUnaryOperator -* static LongUnaryOperator {java11-javadoc}/java.base/java/util/function/LongUnaryOperator.html#identity()[identity]() -* LongUnaryOperator {java11-javadoc}/java.base/java/util/function/LongUnaryOperator.html#andThen(java.util.function.LongUnaryOperator)[andThen](LongUnaryOperator) -* long {java11-javadoc}/java.base/java/util/function/LongUnaryOperator.html#applyAsLong(long)[applyAsLong](long) -* LongUnaryOperator {java11-javadoc}/java.base/java/util/function/LongUnaryOperator.html#compose(java.util.function.LongUnaryOperator)[compose](LongUnaryOperator) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ObjDoubleConsumer]] -==== ObjDoubleConsumer -* void {java11-javadoc}/java.base/java/util/function/ObjDoubleConsumer.html#accept(java.lang.Object,double)[accept](def, double) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ObjIntConsumer]] -==== ObjIntConsumer -* void {java11-javadoc}/java.base/java/util/function/ObjIntConsumer.html#accept(java.lang.Object,int)[accept](def, int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ObjLongConsumer]] -==== ObjLongConsumer -* void {java11-javadoc}/java.base/java/util/function/ObjLongConsumer.html#accept(java.lang.Object,long)[accept](def, long) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Predicate]] -==== Predicate -* static Predicate {java11-javadoc}/java.base/java/util/function/Predicate.html#isEqual(java.lang.Object)[isEqual](def) -* Predicate {java11-javadoc}/java.base/java/util/function/Predicate.html#and(java.util.function.Predicate)[and](Predicate) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Predicate {java11-javadoc}/java.base/java/util/function/Predicate.html#negate()[negate]() -* Predicate {java11-javadoc}/java.base/java/util/function/Predicate.html#or(java.util.function.Predicate)[or](Predicate) -* boolean {java11-javadoc}/java.base/java/util/function/Predicate.html#test(java.lang.Object)[test](def) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Supplier]] -==== Supplier -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* def {java11-javadoc}/java.base/java/util/function/Supplier.html#get()[get]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ToDoubleBiFunction]] -==== ToDoubleBiFunction -* double {java11-javadoc}/java.base/java/util/function/ToDoubleBiFunction.html#applyAsDouble(java.lang.Object,java.lang.Object)[applyAsDouble](def, def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ToDoubleFunction]] -==== ToDoubleFunction -* double {java11-javadoc}/java.base/java/util/function/ToDoubleFunction.html#applyAsDouble(java.lang.Object)[applyAsDouble](def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ToIntBiFunction]] -==== ToIntBiFunction -* int {java11-javadoc}/java.base/java/util/function/ToIntBiFunction.html#applyAsInt(java.lang.Object,java.lang.Object)[applyAsInt](def, def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ToIntFunction]] -==== ToIntFunction -* int {java11-javadoc}/java.base/java/util/function/ToIntFunction.html#applyAsInt(java.lang.Object)[applyAsInt](def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ToLongBiFunction]] -==== ToLongBiFunction -* long {java11-javadoc}/java.base/java/util/function/ToLongBiFunction.html#applyAsLong(java.lang.Object,java.lang.Object)[applyAsLong](def, def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ToLongFunction]] -==== ToLongFunction -* long {java11-javadoc}/java.base/java/util/function/ToLongFunction.html#applyAsLong(java.lang.Object)[applyAsLong](def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-UnaryOperator]] -==== UnaryOperator -* static UnaryOperator {java11-javadoc}/java.base/java/util/function/UnaryOperator.html#identity()[identity]() -* Function {java11-javadoc}/java.base/java/util/function/Function.html#andThen(java.util.function.Function)[andThen](Function) -* def {java11-javadoc}/java.base/java/util/function/Function.html#apply(java.lang.Object)[apply](def) -* Function {java11-javadoc}/java.base/java/util/function/Function.html#compose(java.util.function.Function)[compose](Function) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-java-util-regex"] -=== Shared API for package java.util.regex -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-Matcher]] -==== Matcher -* static null {java11-javadoc}/java.base/java/util/regex/Matcher.html#quoteReplacement(java.lang.String)[quoteReplacement](null) -* int {java11-javadoc}/java.base/java/util/regex/Matcher.html#end()[end]() -* int {java11-javadoc}/java.base/java/util/regex/Matcher.html#end(int)[end](int) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/util/regex/Matcher.html#find()[find]() -* boolean {java11-javadoc}/java.base/java/util/regex/Matcher.html#find(int)[find](int) -* null {java11-javadoc}/java.base/java/util/regex/Matcher.html#group()[group]() -* null {java11-javadoc}/java.base/java/util/regex/Matcher.html#group(int)[group](int) -* int {java11-javadoc}/java.base/java/util/regex/Matcher.html#groupCount()[groupCount]() -* boolean {java11-javadoc}/java.base/java/util/regex/Matcher.html#hasAnchoringBounds()[hasAnchoringBounds]() -* boolean {java11-javadoc}/java.base/java/util/regex/Matcher.html#hasTransparentBounds()[hasTransparentBounds]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/regex/Matcher.html#hitEnd()[hitEnd]() -* boolean {java11-javadoc}/java.base/java/util/regex/Matcher.html#lookingAt()[lookingAt]() -* boolean {java11-javadoc}/java.base/java/util/regex/Matcher.html#matches()[matches]() -* null namedGroup(null) -* Pattern {java11-javadoc}/java.base/java/util/regex/Matcher.html#pattern()[pattern]() -* Matcher {java11-javadoc}/java.base/java/util/regex/Matcher.html#region(int,int)[region](int, int) -* int {java11-javadoc}/java.base/java/util/regex/Matcher.html#regionEnd()[regionEnd]() -* int {java11-javadoc}/java.base/java/util/regex/Matcher.html#regionStart()[regionStart]() -* null {java11-javadoc}/java.base/java/util/regex/Matcher.html#replaceAll(java.lang.String)[replaceAll](null) -* null {java11-javadoc}/java.base/java/util/regex/Matcher.html#replaceFirst(java.lang.String)[replaceFirst](null) -* boolean {java11-javadoc}/java.base/java/util/regex/Matcher.html#requireEnd()[requireEnd]() -* Matcher {java11-javadoc}/java.base/java/util/regex/Matcher.html#reset()[reset]() -* int {java11-javadoc}/java.base/java/util/regex/Matcher.html#start()[start]() -* int {java11-javadoc}/java.base/java/util/regex/Matcher.html#start(int)[start](int) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* Matcher {java11-javadoc}/java.base/java/util/regex/Matcher.html#useAnchoringBounds(boolean)[useAnchoringBounds](boolean) -* Matcher {java11-javadoc}/java.base/java/util/regex/Matcher.html#usePattern(java.util.regex.Pattern)[usePattern](Pattern) -* Matcher {java11-javadoc}/java.base/java/util/regex/Matcher.html#useTransparentBounds(boolean)[useTransparentBounds](boolean) - - -[[painless-api-reference-shared-Pattern]] -==== Pattern -* static null {java11-javadoc}/java.base/java/util/regex/Pattern.html#quote(java.lang.String)[quote](null) -* Predicate {java11-javadoc}/java.base/java/util/regex/Pattern.html#asPredicate()[asPredicate]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/util/regex/Pattern.html#flags()[flags]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Matcher {java11-javadoc}/java.base/java/util/regex/Pattern.html#matcher(java.lang.CharSequence)[matcher](CharSequence) -* null {java11-javadoc}/java.base/java/util/regex/Pattern.html#pattern()[pattern]() -* null[] {java11-javadoc}/java.base/java/util/regex/Pattern.html#split(java.lang.CharSequence)[split](CharSequence) -* null[] {java11-javadoc}/java.base/java/util/regex/Pattern.html#split(java.lang.CharSequence,int)[split](CharSequence, int) -* Stream {java11-javadoc}/java.base/java/util/regex/Pattern.html#splitAsStream(java.lang.CharSequence)[splitAsStream](CharSequence) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-java-util-stream"] -=== Shared API for package java.util.stream -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-BaseStream]] -==== BaseStream -* void {java11-javadoc}/java.base/java/util/stream/BaseStream.html#close()[close]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/stream/BaseStream.html#isParallel()[isParallel]() -* Iterator {java11-javadoc}/java.base/java/util/stream/BaseStream.html#iterator()[iterator]() -* BaseStream {java11-javadoc}/java.base/java/util/stream/BaseStream.html#sequential()[sequential]() -* Spliterator {java11-javadoc}/java.base/java/util/stream/BaseStream.html#spliterator()[spliterator]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* BaseStream {java11-javadoc}/java.base/java/util/stream/BaseStream.html#unordered()[unordered]() - - -[[painless-api-reference-shared-Collector]] -==== Collector -* static Collector {java11-javadoc}/java.base/java/util/stream/Collector.html#of(java.util.function.Supplier,java.util.function.BiConsumer,java.util.function.BinaryOperator,java.util.stream.Collector$Characteristics%5B%5D)[of](Supplier, BiConsumer, BinaryOperator, Collector.Characteristics[]) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collector.html#of(java.util.function.Supplier,java.util.function.BiConsumer,java.util.function.BinaryOperator,java.util.function.Function,java.util.stream.Collector$Characteristics%5B%5D)[of](Supplier, BiConsumer, BinaryOperator, Function, Collector.Characteristics[]) -* BiConsumer {java11-javadoc}/java.base/java/util/stream/Collector.html#accumulator()[accumulator]() -* Set {java11-javadoc}/java.base/java/util/stream/Collector.html#characteristics()[characteristics]() -* BinaryOperator {java11-javadoc}/java.base/java/util/stream/Collector.html#combiner()[combiner]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* Function {java11-javadoc}/java.base/java/util/stream/Collector.html#finisher()[finisher]() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* Supplier {java11-javadoc}/java.base/java/util/stream/Collector.html#supplier()[supplier]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Collector-Characteristics]] -==== Collector.Characteristics -* static Collector.Characteristics {java11-javadoc}/java.base/java/util/stream/Collector$Characteristics.html#CONCURRENT[CONCURRENT] -* static Collector.Characteristics {java11-javadoc}/java.base/java/util/stream/Collector$Characteristics.html#IDENTITY_FINISH[IDENTITY_FINISH] -* static Collector.Characteristics {java11-javadoc}/java.base/java/util/stream/Collector$Characteristics.html#UNORDERED[UNORDERED] -* static Collector.Characteristics {java11-javadoc}/java.base/java/util/stream/Collector$Characteristics.html#valueOf(java.lang.String)[valueOf](null) -* static Collector.Characteristics[] {java11-javadoc}/java.base/java/util/stream/Collector$Characteristics.html#values()[values]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#compareTo(java.lang.Enum)[compareTo](Enum) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Enum.html#name()[name]() -* int {java11-javadoc}/java.base/java/lang/Enum.html#ordinal()[ordinal]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Collectors]] -==== Collectors -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#averagingDouble(java.util.function.ToDoubleFunction)[averagingDouble](ToDoubleFunction) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#averagingInt(java.util.function.ToIntFunction)[averagingInt](ToIntFunction) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#averagingLong(java.util.function.ToLongFunction)[averagingLong](ToLongFunction) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#collectingAndThen(java.util.stream.Collector,java.util.function.Function)[collectingAndThen](Collector, Function) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#counting()[counting]() -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#groupingBy(java.util.function.Function)[groupingBy](Function) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#groupingBy(java.util.function.Function,java.util.stream.Collector)[groupingBy](Function, Collector) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#groupingBy(java.util.function.Function,java.util.function.Supplier,java.util.stream.Collector)[groupingBy](Function, Supplier, Collector) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#joining()[joining]() -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#joining(java.lang.CharSequence)[joining](CharSequence) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#joining(java.lang.CharSequence,java.lang.CharSequence,java.lang.CharSequence)[joining](CharSequence, CharSequence, CharSequence) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#mapping(java.util.function.Function,java.util.stream.Collector)[mapping](Function, Collector) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#maxBy(java.util.Comparator)[maxBy](Comparator) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#minBy(java.util.Comparator)[minBy](Comparator) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#partitioningBy(java.util.function.Predicate)[partitioningBy](Predicate) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#partitioningBy(java.util.function.Predicate,java.util.stream.Collector)[partitioningBy](Predicate, Collector) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#reducing(java.util.function.BinaryOperator)[reducing](BinaryOperator) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#reducing(java.lang.Object,java.util.function.BinaryOperator)[reducing](def, BinaryOperator) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#reducing(java.lang.Object,java.util.function.Function,java.util.function.BinaryOperator)[reducing](def, Function, BinaryOperator) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#summarizingDouble(java.util.function.ToDoubleFunction)[summarizingDouble](ToDoubleFunction) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#summarizingInt(java.util.function.ToIntFunction)[summarizingInt](ToIntFunction) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#summarizingLong(java.util.function.ToLongFunction)[summarizingLong](ToLongFunction) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#summingDouble(java.util.function.ToDoubleFunction)[summingDouble](ToDoubleFunction) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#summingInt(java.util.function.ToIntFunction)[summingInt](ToIntFunction) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#summingLong(java.util.function.ToLongFunction)[summingLong](ToLongFunction) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#toCollection(java.util.function.Supplier)[toCollection](Supplier) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#toList()[toList]() -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#toMap(java.util.function.Function,java.util.function.Function)[toMap](Function, Function) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#toMap(java.util.function.Function,java.util.function.Function,java.util.function.BinaryOperator)[toMap](Function, Function, BinaryOperator) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#toMap(java.util.function.Function,java.util.function.Function,java.util.function.BinaryOperator,java.util.function.Supplier)[toMap](Function, Function, BinaryOperator, Supplier) -* static Collector {java11-javadoc}/java.base/java/util/stream/Collectors.html#toSet()[toSet]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-DoubleStream]] -==== DoubleStream -* static DoubleStream.Builder {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#builder()[builder]() -* static DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#concat(java.util.stream.DoubleStream,java.util.stream.DoubleStream)[concat](DoubleStream, DoubleStream) -* static DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#empty()[empty]() -* static DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#of(double%5B%5D)[of](double[]) -* boolean {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#allMatch(java.util.function.DoublePredicate)[allMatch](DoublePredicate) -* boolean {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#anyMatch(java.util.function.DoublePredicate)[anyMatch](DoublePredicate) -* OptionalDouble {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#average()[average]() -* Stream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#boxed()[boxed]() -* void {java11-javadoc}/java.base/java/util/stream/BaseStream.html#close()[close]() -* def {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#collect(java.util.function.Supplier,java.util.function.ObjDoubleConsumer,java.util.function.BiConsumer)[collect](Supplier, ObjDoubleConsumer, BiConsumer) -* long {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#count()[count]() -* DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#distinct()[distinct]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#filter(java.util.function.DoublePredicate)[filter](DoublePredicate) -* OptionalDouble {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#findAny()[findAny]() -* OptionalDouble {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#findFirst()[findFirst]() -* DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#flatMap(java.util.function.DoubleFunction)[flatMap](DoubleFunction) -* void {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#forEach(java.util.function.DoubleConsumer)[forEach](DoubleConsumer) -* void {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#forEachOrdered(java.util.function.DoubleConsumer)[forEachOrdered](DoubleConsumer) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/stream/BaseStream.html#isParallel()[isParallel]() -* PrimitiveIterator.OfDouble {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#iterator()[iterator]() -* DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#limit(long)[limit](long) -* DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#map(java.util.function.DoubleUnaryOperator)[map](DoubleUnaryOperator) -* IntStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#mapToInt(java.util.function.DoubleToIntFunction)[mapToInt](DoubleToIntFunction) -* LongStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#mapToLong(java.util.function.DoubleToLongFunction)[mapToLong](DoubleToLongFunction) -* Stream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#mapToObj(java.util.function.DoubleFunction)[mapToObj](DoubleFunction) -* OptionalDouble {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#max()[max]() -* OptionalDouble {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#min()[min]() -* boolean {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#noneMatch(java.util.function.DoublePredicate)[noneMatch](DoublePredicate) -* DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#peek(java.util.function.DoubleConsumer)[peek](DoubleConsumer) -* OptionalDouble {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#reduce(java.util.function.DoubleBinaryOperator)[reduce](DoubleBinaryOperator) -* double {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#reduce(double,java.util.function.DoubleBinaryOperator)[reduce](double, DoubleBinaryOperator) -* DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#sequential()[sequential]() -* DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#skip(long)[skip](long) -* DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#sorted()[sorted]() -* Spliterator.OfDouble {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#spliterator()[spliterator]() -* double {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#sum()[sum]() -* DoubleSummaryStatistics {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#summaryStatistics()[summaryStatistics]() -* double[] {java11-javadoc}/java.base/java/util/stream/DoubleStream.html#toArray()[toArray]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* BaseStream {java11-javadoc}/java.base/java/util/stream/BaseStream.html#unordered()[unordered]() - - -[[painless-api-reference-shared-DoubleStream-Builder]] -==== DoubleStream.Builder -* void {java11-javadoc}/java.base/java/util/function/DoubleConsumer.html#accept(double)[accept](double) -* DoubleStream.Builder {java11-javadoc}/java.base/java/util/stream/DoubleStream$Builder.html#add(double)[add](double) -* DoubleConsumer {java11-javadoc}/java.base/java/util/function/DoubleConsumer.html#andThen(java.util.function.DoubleConsumer)[andThen](DoubleConsumer) -* DoubleStream {java11-javadoc}/java.base/java/util/stream/DoubleStream$Builder.html#build()[build]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-IntStream]] -==== IntStream -* static IntStream.Builder {java11-javadoc}/java.base/java/util/stream/IntStream.html#builder()[builder]() -* static IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#concat(java.util.stream.IntStream,java.util.stream.IntStream)[concat](IntStream, IntStream) -* static IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#empty()[empty]() -* static IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#of(int%5B%5D)[of](int[]) -* static IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#range(int,int)[range](int, int) -* static IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#rangeClosed(int,int)[rangeClosed](int, int) -* boolean {java11-javadoc}/java.base/java/util/stream/IntStream.html#allMatch(java.util.function.IntPredicate)[allMatch](IntPredicate) -* boolean {java11-javadoc}/java.base/java/util/stream/IntStream.html#anyMatch(java.util.function.IntPredicate)[anyMatch](IntPredicate) -* DoubleStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#asDoubleStream()[asDoubleStream]() -* LongStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#asLongStream()[asLongStream]() -* OptionalDouble {java11-javadoc}/java.base/java/util/stream/IntStream.html#average()[average]() -* Stream {java11-javadoc}/java.base/java/util/stream/IntStream.html#boxed()[boxed]() -* void {java11-javadoc}/java.base/java/util/stream/BaseStream.html#close()[close]() -* def {java11-javadoc}/java.base/java/util/stream/IntStream.html#collect(java.util.function.Supplier,java.util.function.ObjIntConsumer,java.util.function.BiConsumer)[collect](Supplier, ObjIntConsumer, BiConsumer) -* long {java11-javadoc}/java.base/java/util/stream/IntStream.html#count()[count]() -* IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#distinct()[distinct]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#filter(java.util.function.IntPredicate)[filter](IntPredicate) -* OptionalInt {java11-javadoc}/java.base/java/util/stream/IntStream.html#findAny()[findAny]() -* OptionalInt {java11-javadoc}/java.base/java/util/stream/IntStream.html#findFirst()[findFirst]() -* IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#flatMap(java.util.function.IntFunction)[flatMap](IntFunction) -* void {java11-javadoc}/java.base/java/util/stream/IntStream.html#forEach(java.util.function.IntConsumer)[forEach](IntConsumer) -* void {java11-javadoc}/java.base/java/util/stream/IntStream.html#forEachOrdered(java.util.function.IntConsumer)[forEachOrdered](IntConsumer) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/stream/BaseStream.html#isParallel()[isParallel]() -* PrimitiveIterator.OfInt {java11-javadoc}/java.base/java/util/stream/IntStream.html#iterator()[iterator]() -* IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#limit(long)[limit](long) -* IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#map(java.util.function.IntUnaryOperator)[map](IntUnaryOperator) -* DoubleStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#mapToDouble(java.util.function.IntToDoubleFunction)[mapToDouble](IntToDoubleFunction) -* LongStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#mapToLong(java.util.function.IntToLongFunction)[mapToLong](IntToLongFunction) -* Stream {java11-javadoc}/java.base/java/util/stream/IntStream.html#mapToObj(java.util.function.IntFunction)[mapToObj](IntFunction) -* OptionalInt {java11-javadoc}/java.base/java/util/stream/IntStream.html#max()[max]() -* OptionalInt {java11-javadoc}/java.base/java/util/stream/IntStream.html#min()[min]() -* boolean {java11-javadoc}/java.base/java/util/stream/IntStream.html#noneMatch(java.util.function.IntPredicate)[noneMatch](IntPredicate) -* IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#peek(java.util.function.IntConsumer)[peek](IntConsumer) -* OptionalInt {java11-javadoc}/java.base/java/util/stream/IntStream.html#reduce(java.util.function.IntBinaryOperator)[reduce](IntBinaryOperator) -* int {java11-javadoc}/java.base/java/util/stream/IntStream.html#reduce(int,java.util.function.IntBinaryOperator)[reduce](int, IntBinaryOperator) -* IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#sequential()[sequential]() -* IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#skip(long)[skip](long) -* IntStream {java11-javadoc}/java.base/java/util/stream/IntStream.html#sorted()[sorted]() -* Spliterator.OfInt {java11-javadoc}/java.base/java/util/stream/IntStream.html#spliterator()[spliterator]() -* int {java11-javadoc}/java.base/java/util/stream/IntStream.html#sum()[sum]() -* IntSummaryStatistics {java11-javadoc}/java.base/java/util/stream/IntStream.html#summaryStatistics()[summaryStatistics]() -* int[] {java11-javadoc}/java.base/java/util/stream/IntStream.html#toArray()[toArray]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* BaseStream {java11-javadoc}/java.base/java/util/stream/BaseStream.html#unordered()[unordered]() - - -[[painless-api-reference-shared-IntStream-Builder]] -==== IntStream.Builder -* void {java11-javadoc}/java.base/java/util/function/IntConsumer.html#accept(int)[accept](int) -* IntStream.Builder {java11-javadoc}/java.base/java/util/stream/IntStream$Builder.html#add(int)[add](int) -* IntConsumer {java11-javadoc}/java.base/java/util/function/IntConsumer.html#andThen(java.util.function.IntConsumer)[andThen](IntConsumer) -* IntStream {java11-javadoc}/java.base/java/util/stream/IntStream$Builder.html#build()[build]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-LongStream]] -==== LongStream -* static LongStream.Builder {java11-javadoc}/java.base/java/util/stream/LongStream.html#builder()[builder]() -* static LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#concat(java.util.stream.LongStream,java.util.stream.LongStream)[concat](LongStream, LongStream) -* static LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#empty()[empty]() -* static LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#of(long%5B%5D)[of](long[]) -* static LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#range(long,long)[range](long, long) -* static LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#rangeClosed(long,long)[rangeClosed](long, long) -* boolean {java11-javadoc}/java.base/java/util/stream/LongStream.html#allMatch(java.util.function.LongPredicate)[allMatch](LongPredicate) -* boolean {java11-javadoc}/java.base/java/util/stream/LongStream.html#anyMatch(java.util.function.LongPredicate)[anyMatch](LongPredicate) -* DoubleStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#asDoubleStream()[asDoubleStream]() -* OptionalDouble {java11-javadoc}/java.base/java/util/stream/LongStream.html#average()[average]() -* Stream {java11-javadoc}/java.base/java/util/stream/LongStream.html#boxed()[boxed]() -* void {java11-javadoc}/java.base/java/util/stream/BaseStream.html#close()[close]() -* def {java11-javadoc}/java.base/java/util/stream/LongStream.html#collect(java.util.function.Supplier,java.util.function.ObjLongConsumer,java.util.function.BiConsumer)[collect](Supplier, ObjLongConsumer, BiConsumer) -* long {java11-javadoc}/java.base/java/util/stream/LongStream.html#count()[count]() -* LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#distinct()[distinct]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#filter(java.util.function.LongPredicate)[filter](LongPredicate) -* OptionalLong {java11-javadoc}/java.base/java/util/stream/LongStream.html#findAny()[findAny]() -* OptionalLong {java11-javadoc}/java.base/java/util/stream/LongStream.html#findFirst()[findFirst]() -* LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#flatMap(java.util.function.LongFunction)[flatMap](LongFunction) -* void {java11-javadoc}/java.base/java/util/stream/LongStream.html#forEach(java.util.function.LongConsumer)[forEach](LongConsumer) -* void {java11-javadoc}/java.base/java/util/stream/LongStream.html#forEachOrdered(java.util.function.LongConsumer)[forEachOrdered](LongConsumer) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/stream/BaseStream.html#isParallel()[isParallel]() -* PrimitiveIterator.OfLong {java11-javadoc}/java.base/java/util/stream/LongStream.html#iterator()[iterator]() -* LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#limit(long)[limit](long) -* LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#map(java.util.function.LongUnaryOperator)[map](LongUnaryOperator) -* DoubleStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#mapToDouble(java.util.function.LongToDoubleFunction)[mapToDouble](LongToDoubleFunction) -* IntStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#mapToInt(java.util.function.LongToIntFunction)[mapToInt](LongToIntFunction) -* Stream {java11-javadoc}/java.base/java/util/stream/LongStream.html#mapToObj(java.util.function.LongFunction)[mapToObj](LongFunction) -* OptionalLong {java11-javadoc}/java.base/java/util/stream/LongStream.html#max()[max]() -* OptionalLong {java11-javadoc}/java.base/java/util/stream/LongStream.html#min()[min]() -* boolean {java11-javadoc}/java.base/java/util/stream/LongStream.html#noneMatch(java.util.function.LongPredicate)[noneMatch](LongPredicate) -* LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#peek(java.util.function.LongConsumer)[peek](LongConsumer) -* OptionalLong {java11-javadoc}/java.base/java/util/stream/LongStream.html#reduce(java.util.function.LongBinaryOperator)[reduce](LongBinaryOperator) -* long {java11-javadoc}/java.base/java/util/stream/LongStream.html#reduce(long,java.util.function.LongBinaryOperator)[reduce](long, LongBinaryOperator) -* LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#sequential()[sequential]() -* LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#skip(long)[skip](long) -* LongStream {java11-javadoc}/java.base/java/util/stream/LongStream.html#sorted()[sorted]() -* Spliterator.OfLong {java11-javadoc}/java.base/java/util/stream/LongStream.html#spliterator()[spliterator]() -* long {java11-javadoc}/java.base/java/util/stream/LongStream.html#sum()[sum]() -* LongSummaryStatistics {java11-javadoc}/java.base/java/util/stream/LongStream.html#summaryStatistics()[summaryStatistics]() -* long[] {java11-javadoc}/java.base/java/util/stream/LongStream.html#toArray()[toArray]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* BaseStream {java11-javadoc}/java.base/java/util/stream/BaseStream.html#unordered()[unordered]() - - -[[painless-api-reference-shared-LongStream-Builder]] -==== LongStream.Builder -* void {java11-javadoc}/java.base/java/util/function/LongConsumer.html#accept(long)[accept](long) -* LongStream.Builder {java11-javadoc}/java.base/java/util/stream/LongStream$Builder.html#add(long)[add](long) -* LongConsumer {java11-javadoc}/java.base/java/util/function/LongConsumer.html#andThen(java.util.function.LongConsumer)[andThen](LongConsumer) -* LongStream {java11-javadoc}/java.base/java/util/stream/LongStream$Builder.html#build()[build]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-Stream]] -==== Stream -* static Stream.Builder {java11-javadoc}/java.base/java/util/stream/Stream.html#builder()[builder]() -* static Stream {java11-javadoc}/java.base/java/util/stream/Stream.html#concat(java.util.stream.Stream,java.util.stream.Stream)[concat](Stream, Stream) -* static Stream {java11-javadoc}/java.base/java/util/stream/Stream.html#empty()[empty]() -* static Stream {java11-javadoc}/java.base/java/util/stream/Stream.html#of(java.lang.Object%5B%5D)[of](def[]) -* boolean {java11-javadoc}/java.base/java/util/stream/Stream.html#allMatch(java.util.function.Predicate)[allMatch](Predicate) -* boolean {java11-javadoc}/java.base/java/util/stream/Stream.html#anyMatch(java.util.function.Predicate)[anyMatch](Predicate) -* void {java11-javadoc}/java.base/java/util/stream/BaseStream.html#close()[close]() -* def {java11-javadoc}/java.base/java/util/stream/Stream.html#collect(java.util.stream.Collector)[collect](Collector) -* def {java11-javadoc}/java.base/java/util/stream/Stream.html#collect(java.util.function.Supplier,java.util.function.BiConsumer,java.util.function.BiConsumer)[collect](Supplier, BiConsumer, BiConsumer) -* long {java11-javadoc}/java.base/java/util/stream/Stream.html#count()[count]() -* Stream {java11-javadoc}/java.base/java/util/stream/Stream.html#distinct()[distinct]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* Stream {java11-javadoc}/java.base/java/util/stream/Stream.html#filter(java.util.function.Predicate)[filter](Predicate) -* Optional {java11-javadoc}/java.base/java/util/stream/Stream.html#findAny()[findAny]() -* Optional {java11-javadoc}/java.base/java/util/stream/Stream.html#findFirst()[findFirst]() -* Stream {java11-javadoc}/java.base/java/util/stream/Stream.html#flatMap(java.util.function.Function)[flatMap](Function) -* DoubleStream {java11-javadoc}/java.base/java/util/stream/Stream.html#flatMapToDouble(java.util.function.Function)[flatMapToDouble](Function) -* IntStream {java11-javadoc}/java.base/java/util/stream/Stream.html#flatMapToInt(java.util.function.Function)[flatMapToInt](Function) -* LongStream {java11-javadoc}/java.base/java/util/stream/Stream.html#flatMapToLong(java.util.function.Function)[flatMapToLong](Function) -* void {java11-javadoc}/java.base/java/util/stream/Stream.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* void {java11-javadoc}/java.base/java/util/stream/Stream.html#forEachOrdered(java.util.function.Consumer)[forEachOrdered](Consumer) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/util/stream/BaseStream.html#isParallel()[isParallel]() -* Iterator {java11-javadoc}/java.base/java/util/stream/BaseStream.html#iterator()[iterator]() -* Stream {java11-javadoc}/java.base/java/util/stream/Stream.html#limit(long)[limit](long) -* Stream {java11-javadoc}/java.base/java/util/stream/Stream.html#map(java.util.function.Function)[map](Function) -* DoubleStream {java11-javadoc}/java.base/java/util/stream/Stream.html#mapToDouble(java.util.function.ToDoubleFunction)[mapToDouble](ToDoubleFunction) -* IntStream {java11-javadoc}/java.base/java/util/stream/Stream.html#mapToInt(java.util.function.ToIntFunction)[mapToInt](ToIntFunction) -* LongStream {java11-javadoc}/java.base/java/util/stream/Stream.html#mapToLong(java.util.function.ToLongFunction)[mapToLong](ToLongFunction) -* Optional {java11-javadoc}/java.base/java/util/stream/Stream.html#max(java.util.Comparator)[max](Comparator) -* Optional {java11-javadoc}/java.base/java/util/stream/Stream.html#min(java.util.Comparator)[min](Comparator) -* boolean {java11-javadoc}/java.base/java/util/stream/Stream.html#noneMatch(java.util.function.Predicate)[noneMatch](Predicate) -* Stream {java11-javadoc}/java.base/java/util/stream/Stream.html#peek(java.util.function.Consumer)[peek](Consumer) -* Optional {java11-javadoc}/java.base/java/util/stream/Stream.html#reduce(java.util.function.BinaryOperator)[reduce](BinaryOperator) -* def {java11-javadoc}/java.base/java/util/stream/Stream.html#reduce(java.lang.Object,java.util.function.BinaryOperator)[reduce](def, BinaryOperator) -* def {java11-javadoc}/java.base/java/util/stream/Stream.html#reduce(java.lang.Object,java.util.function.BiFunction,java.util.function.BinaryOperator)[reduce](def, BiFunction, BinaryOperator) -* BaseStream {java11-javadoc}/java.base/java/util/stream/BaseStream.html#sequential()[sequential]() -* Stream {java11-javadoc}/java.base/java/util/stream/Stream.html#skip(long)[skip](long) -* Stream {java11-javadoc}/java.base/java/util/stream/Stream.html#sorted()[sorted]() -* Stream {java11-javadoc}/java.base/java/util/stream/Stream.html#sorted(java.util.Comparator)[sorted](Comparator) -* Spliterator {java11-javadoc}/java.base/java/util/stream/BaseStream.html#spliterator()[spliterator]() -* def[] {java11-javadoc}/java.base/java/util/stream/Stream.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/stream/Stream.html#toArray(java.util.function.IntFunction)[toArray](IntFunction) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* BaseStream {java11-javadoc}/java.base/java/util/stream/BaseStream.html#unordered()[unordered]() - - -[[painless-api-reference-shared-Stream-Builder]] -==== Stream.Builder -* void {java11-javadoc}/java.base/java/util/function/Consumer.html#accept(java.lang.Object)[accept](def) -* Stream.Builder {java11-javadoc}/java.base/java/util/stream/Stream$Builder.html#add(java.lang.Object)[add](def) -* Consumer {java11-javadoc}/java.base/java/util/function/Consumer.html#andThen(java.util.function.Consumer)[andThen](Consumer) -* Stream {java11-javadoc}/java.base/java/util/stream/Stream$Builder.html#build()[build]() -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-org-apache-lucene-util"] -=== Shared API for package org.apache.lucene.util -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-BytesRef]] -==== BytesRef -* byte[] bytes -* int length -* int offset -* boolean bytesEquals(BytesRef) -* int {java11-javadoc}/java.base/java/lang/Comparable.html#compareTo(java.lang.Object)[compareTo](def) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() -* null utf8ToString() - - -[role="exclude",id="painless-api-reference-shared-org-elasticsearch-common-geo"] -=== Shared API for package org.elasticsearch.common.geo -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-GeoPoint]] -==== GeoPoint -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* double getLat() -* double getLon() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-org-elasticsearch-index-fielddata"] -=== Shared API for package org.elasticsearch.index.fielddata -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-ScriptDocValues-Booleans]] -==== ScriptDocValues.Booleans -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Boolean get(int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* boolean getValue() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ScriptDocValues-BytesRefs]] -==== ScriptDocValues.BytesRefs -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* BytesRef get(int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* BytesRef getValue() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ScriptDocValues-Dates]] -==== ScriptDocValues.Dates -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* JodaCompatibleZonedDateTime get(int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* JodaCompatibleZonedDateTime getValue() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ScriptDocValues-Doubles]] -==== ScriptDocValues.Doubles -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Double get(int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* double getValue() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ScriptDocValues-GeoPoints]] -==== ScriptDocValues.GeoPoints -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* double arcDistance(double, double) -* double arcDistanceWithDefault(double, double, double) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* double geohashDistance(null) -* double geohashDistanceWithDefault(null, double) -* GeoPoint get(int) -* Object getByPath(null) -* Object getByPath(null, Object) -* double getLat() -* double[] getLats() -* int getLength() -* double getLon() -* double[] getLons() -* GeoPoint getValue() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* double planeDistance(double, double) -* double planeDistanceWithDefault(double, double, double) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ScriptDocValues-Longs]] -==== ScriptDocValues.Longs -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* Long get(int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* long getValue() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ScriptDocValues-Strings]] -==== ScriptDocValues.Strings -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* null get(int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* null getValue() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-org-elasticsearch-index-mapper"] -=== Shared API for package org.elasticsearch.index.mapper -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-IpFieldMapper-IpFieldType-IpScriptDocValues]] -==== IpFieldMapper.IpFieldType.IpScriptDocValues -* boolean {java11-javadoc}/java.base/java/util/Collection.html#add(java.lang.Object)[add](def) -* void {java11-javadoc}/java.base/java/util/List.html#add(int,java.lang.Object)[add](int, def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#addAll(java.util.Collection)[addAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/List.html#addAll(int,java.util.Collection)[addAll](int, Collection) -* boolean any(Predicate) -* Collection asCollection() -* List asList() -* void {java11-javadoc}/java.base/java/util/Collection.html#clear()[clear]() -* List collect(Function) -* def collect(Collection, Function) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#contains(java.lang.Object)[contains](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#containsAll(java.util.Collection)[containsAll](Collection) -* def each(Consumer) -* def eachWithIndex(ObjIntConsumer) -* boolean {java11-javadoc}/java.base/java/util/List.html#equals(java.lang.Object)[equals](Object) -* boolean every(Predicate) -* def find(Predicate) -* List findAll(Predicate) -* def findResult(Function) -* def findResult(def, Function) -* List findResults(Function) -* void {java11-javadoc}/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)[forEach](Consumer) -* null get(int) -* Object getByPath(null) -* Object getByPath(null, Object) -* int getLength() -* null getValue() -* Map groupBy(Function) -* int {java11-javadoc}/java.base/java/util/List.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/util/List.html#indexOf(java.lang.Object)[indexOf](def) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#isEmpty()[isEmpty]() -* Iterator {java11-javadoc}/java.base/java/lang/Iterable.html#iterator()[iterator]() -* null join(null) -* int {java11-javadoc}/java.base/java/util/List.html#lastIndexOf(java.lang.Object)[lastIndexOf](def) -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator()[listIterator]() -* ListIterator {java11-javadoc}/java.base/java/util/List.html#listIterator(int)[listIterator](int) -* def {java11-javadoc}/java.base/java/util/List.html#remove(int)[remove](int) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeAll(java.util.Collection)[removeAll](Collection) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#removeIf(java.util.function.Predicate)[removeIf](Predicate) -* void {java11-javadoc}/java.base/java/util/List.html#replaceAll(java.util.function.UnaryOperator)[replaceAll](UnaryOperator) -* boolean {java11-javadoc}/java.base/java/util/Collection.html#retainAll(java.util.Collection)[retainAll](Collection) -* def {java11-javadoc}/java.base/java/util/List.html#set(int,java.lang.Object)[set](int, def) -* int {java11-javadoc}/java.base/java/util/Collection.html#size()[size]() -* void {java11-javadoc}/java.base/java/util/List.html#sort(java.util.Comparator)[sort](Comparator) -* List split(Predicate) -* Spliterator {java11-javadoc}/java.base/java/util/Collection.html#spliterator()[spliterator]() -* Stream {java11-javadoc}/java.base/java/util/Collection.html#stream()[stream]() -* List {java11-javadoc}/java.base/java/util/List.html#subList(int,int)[subList](int, int) -* double sum() -* double sum(ToDoubleFunction) -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray()[toArray]() -* def[] {java11-javadoc}/java.base/java/util/Collection.html#toArray(java.lang.Object%5B%5D)[toArray](def[]) -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-org-elasticsearch-index-query"] -=== Shared API for package org.elasticsearch.index.query -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-IntervalFilterScript-Interval]] -==== IntervalFilterScript.Interval -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int getEnd() -* int getGaps() -* int getStart() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-org-elasticsearch-index-similarity"] -=== Shared API for package org.elasticsearch.index.similarity -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-ScriptedSimilarity-Doc]] -==== ScriptedSimilarity.Doc -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* float getFreq() -* int getLength() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ScriptedSimilarity-Field]] -==== ScriptedSimilarity.Field -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long getDocCount() -* long getSumDocFreq() -* long getSumTotalTermFreq() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ScriptedSimilarity-Query]] -==== ScriptedSimilarity.Query -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* float getBoost() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-shared-ScriptedSimilarity-Term]] -==== ScriptedSimilarity.Term -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* long getDocFreq() -* long getTotalTermFreq() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-org-elasticsearch-painless-api"] -=== Shared API for package org.elasticsearch.painless.api -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-Debug]] -==== Debug -* static void explain(Object) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-shared-org-elasticsearch-script"] -=== Shared API for package org.elasticsearch.script -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-JodaCompatibleZonedDateTime]] -==== JodaCompatibleZonedDateTime -* int {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#compareTo(java.time.chrono.ChronoZonedDateTime)[compareTo](ChronoZonedDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#equals(java.lang.Object)[equals](Object) -* null {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#format(java.time.format.DateTimeFormatter)[format](DateTimeFormatter) -* int {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#get(java.time.temporal.TemporalField)[get](TemporalField) -* int getCenturyOfEra() -* Chronology {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#getChronology()[getChronology]() -* int getDayOfMonth() -* int getDayOfWeek() -* DayOfWeek getDayOfWeekEnum() -* int getDayOfYear() -* int getEra() -* int getHour() -* int getHourOfDay() -* long {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#getLong(java.time.temporal.TemporalField)[getLong](TemporalField) -* long getMillis() -* int getMillisOfDay() -* int getMillisOfSecond() -* int getMinute() -* int getMinuteOfDay() -* int getMinuteOfHour() -* Month getMonth() -* int getMonthOfYear() -* int getMonthValue() -* int getNano() -* ZoneOffset {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#getOffset()[getOffset]() -* int getSecond() -* int getSecondOfDay() -* int getSecondOfMinute() -* int getWeekOfWeekyear() -* int getWeekyear() -* int getYear() -* int getYearOfCentury() -* int getYearOfEra() -* ZoneId {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#getZone()[getZone]() -* int {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#hashCode()[hashCode]() -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#isAfter(java.time.chrono.ChronoZonedDateTime)[isAfter](ChronoZonedDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#isBefore(java.time.chrono.ChronoZonedDateTime)[isBefore](ChronoZonedDateTime) -* boolean {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#isEqual(java.time.chrono.ChronoZonedDateTime)[isEqual](ChronoZonedDateTime) -* boolean {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#isSupported(java.time.temporal.TemporalField)[isSupported](TemporalField) -* ZonedDateTime minus(TemporalAmount) -* ZonedDateTime minus(long, TemporalUnit) -* ZonedDateTime minusDays(long) -* ZonedDateTime minusHours(long) -* ZonedDateTime minusMinutes(long) -* ZonedDateTime minusMonths(long) -* ZonedDateTime minusNanos(long) -* ZonedDateTime minusSeconds(long) -* ZonedDateTime minusWeeks(long) -* ZonedDateTime minusYears(long) -* ZonedDateTime plus(TemporalAmount) -* ZonedDateTime plus(long, TemporalUnit) -* ZonedDateTime plusDays(long) -* ZonedDateTime plusHours(long) -* ZonedDateTime plusMinutes(long) -* ZonedDateTime plusMonths(long) -* ZonedDateTime plusNanos(long) -* ZonedDateTime plusSeconds(long) -* ZonedDateTime plusWeeks(long) -* ZonedDateTime plusYears(long) -* def {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#query(java.time.temporal.TemporalQuery)[query](TemporalQuery) -* ValueRange {java11-javadoc}/java.base/java/time/temporal/TemporalAccessor.html#range(java.time.temporal.TemporalField)[range](TemporalField) -* long {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toEpochSecond()[toEpochSecond]() -* Instant {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toInstant()[toInstant]() -* LocalDate toLocalDate() -* LocalDateTime toLocalDateTime() -* LocalTime {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toLocalTime()[toLocalTime]() -* OffsetDateTime toOffsetDateTime() -* null {java11-javadoc}/java.base/java/time/chrono/ChronoZonedDateTime.html#toString()[toString]() -* null toString(null) -* null toString(null, Locale) -* ZonedDateTime truncatedTo(TemporalUnit) -* long {java11-javadoc}/java.base/java/time/temporal/Temporal.html#until(java.time.temporal.Temporal,java.time.temporal.TemporalUnit)[until](Temporal, TemporalUnit) -* ZonedDateTime with(TemporalAdjuster) -* ZonedDateTime with(TemporalField, long) -* ZonedDateTime withDayOfMonth(int) -* ZonedDateTime withDayOfYear(int) -* ZonedDateTime withEarlierOffsetAtOverlap() -* ZonedDateTime withFixedOffsetZone() -* ZonedDateTime withHour(int) -* ZonedDateTime withLaterOffsetAtOverlap() -* ZonedDateTime withMinute(int) -* ZonedDateTime withMonth(int) -* ZonedDateTime withNano(int) -* ZonedDateTime withSecond(int) -* ZonedDateTime withYear(int) -* ZonedDateTime withZoneSameInstant(ZoneId) -* ZonedDateTime withZoneSameLocal(ZoneId) - - -[role="exclude",id="painless-api-reference-shared-org-elasticsearch-search-lookup"] -=== Shared API for package org.elasticsearch.search.lookup -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-shared-FieldLookup]] -==== FieldLookup -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* def getValue() -* List getValues() -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* boolean isEmpty() -* null {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-similarity-weight/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-similarity-weight/index.asciidoc deleted file mode 100644 index 8d9813dd36372..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-similarity-weight/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-similarity-weight]] -=== Similarity Weight API - -The following specialized API is available in the Similarity Weight context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-similarity-weight/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-similarity-weight/packages.asciidoc deleted file mode 100644 index a0510fb5f7821..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-similarity-weight/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-similarity-weight-java-lang"] -=== Similarity Weight API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-similarity-weight-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-similarity/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-similarity/index.asciidoc deleted file mode 100644 index f8d65739b637e..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-similarity/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-similarity]] -=== Similarity API - -The following specialized API is available in the Similarity context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-similarity/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-similarity/packages.asciidoc deleted file mode 100644 index 6bd05a8f495ac..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-similarity/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-similarity-java-lang"] -=== Similarity API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-similarity-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-string-sort/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-string-sort/index.asciidoc deleted file mode 100644 index bf6121a8d9915..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-string-sort/index.asciidoc +++ /dev/null @@ -1,31 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-string-sort]] -=== String Sort API - -The following specialized API is available in the String Sort context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -==== org.elasticsearch.xpack.sql.expression.literal.geo -<> - -* <> - -==== org.elasticsearch.xpack.sql.expression.literal.interval -<> - -* <> -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-string-sort/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-string-sort/packages.asciidoc deleted file mode 100644 index af4f941bedd96..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-string-sort/packages.asciidoc +++ /dev/null @@ -1,91 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-string-sort-java-lang"] -=== String Sort API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-string-sort-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - -[role="exclude",id="painless-api-reference-string-sort-org-elasticsearch-xpack-sql-expression-literal-geo"] -=== String Sort API for package org.elasticsearch.xpack.sql.expression.literal.geo -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-string-sort-GeoShape]] -==== GeoShape -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[role="exclude",id="painless-api-reference-string-sort-org-elasticsearch-xpack-sql-expression-literal-interval"] -=== String Sort API for package org.elasticsearch.xpack.sql.expression.literal.interval -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-string-sort-IntervalDayTime]] -==== IntervalDayTime -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - -[[painless-api-reference-string-sort-IntervalYearMonth]] -==== IntervalYearMonth -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* String {java11-javadoc}/java.base/java/lang/Object.html#toString()[toString]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-template/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-template/index.asciidoc deleted file mode 100644 index b53ce4bfea8db..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-template/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-template]] -=== Template API - -The following specialized API is available in the Template context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-template/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-template/packages.asciidoc deleted file mode 100644 index 4581bab845068..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-template/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-template-java-lang"] -=== Template API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-template-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-terms-set/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-terms-set/index.asciidoc deleted file mode 100644 index 3ed5661d91074..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-terms-set/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-terms-set]] -=== Terms Set API - -The following specialized API is available in the Terms Set context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-terms-set/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-terms-set/packages.asciidoc deleted file mode 100644 index 65c8db7af8c16..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-terms-set/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-terms-set-java-lang"] -=== Terms Set API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-terms-set-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-update/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-update/index.asciidoc deleted file mode 100644 index 139ab5d4984f3..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-update/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-update]] -=== Update API - -The following specialized API is available in the Update context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-update/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-update/packages.asciidoc deleted file mode 100644 index fa4874dc26294..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-update/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-update-java-lang"] -=== Update API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-update-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-watcher-condition/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-watcher-condition/index.asciidoc deleted file mode 100644 index ab62b04bb9d5d..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-watcher-condition/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-watcher-condition]] -=== Watcher Condition API - -The following specialized API is available in the Watcher Condition context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-watcher-condition/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-watcher-condition/packages.asciidoc deleted file mode 100644 index 91df00b419f42..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-watcher-condition/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-watcher-condition-java-lang"] -=== Watcher Condition API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-watcher-condition-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-watcher-transform/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-watcher-transform/index.asciidoc deleted file mode 100644 index 35610ce08245c..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-watcher-transform/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-watcher-transform]] -=== Watcher Transform API - -The following specialized API is available in the Watcher Transform context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-watcher-transform/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-watcher-transform/packages.asciidoc deleted file mode 100644 index a220172510b49..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-watcher-transform/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-watcher-transform-java-lang"] -=== Watcher Transform API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-watcher-transform-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-api-reference/painless-api-reference-xpack-template/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-xpack-template/index.asciidoc deleted file mode 100644 index 47035e9bc17a4..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-xpack-template/index.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -// This file is auto-generated. Do not edit. - -[[painless-api-reference-xpack-template]] -=== Xpack Template API - -The following specialized API is available in the Xpack Template context. - -* See the <> for further API available in all contexts. - -==== Classes By Package -The following classes are available grouped by their respective packages. Click on a class to view details about the available methods and fields. - - -==== java.lang -<> - -* <> - -include::packages.asciidoc[] - diff --git a/docs/painless/painless-api-reference/painless-api-reference-xpack-template/packages.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-xpack-template/packages.asciidoc deleted file mode 100644 index ab5d3f78498ba..0000000000000 --- a/docs/painless/painless-api-reference/painless-api-reference-xpack-template/packages.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -// This file is auto-generated. Do not edit. - - -[role="exclude",id="painless-api-reference-xpack-template-java-lang"] -=== Xpack Template API for package java.lang -See the <> for a high-level overview of all packages and classes. - -[[painless-api-reference-xpack-template-String]] -==== String -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D)[copyValueOf](char[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#copyValueOf(char%5B%5D,int,int)[copyValueOf](char[], int, int) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.lang.String,java.lang.Object%5B%5D)[format](String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#format(java.util.Locale,java.lang.String,java.lang.Object%5B%5D)[format](Locale, String, def[]) -* static String {java11-javadoc}/java.base/java/lang/String.html#join(java.lang.CharSequence,java.lang.Iterable)[join](CharSequence, Iterable) -* static String {java11-javadoc}/java.base/java/lang/String.html#valueOf(java.lang.Object)[valueOf](def) -* {java11-javadoc}/java.base/java/lang/String.html#()[String]() -* char {java11-javadoc}/java.base/java/lang/CharSequence.html#charAt(int)[charAt](int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#chars()[chars]() -* int {java11-javadoc}/java.base/java/lang/String.html#codePointAt(int)[codePointAt](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointBefore(int)[codePointBefore](int) -* int {java11-javadoc}/java.base/java/lang/String.html#codePointCount(int,int)[codePointCount](int, int) -* IntStream {java11-javadoc}/java.base/java/lang/CharSequence.html#codePoints()[codePoints]() -* int {java11-javadoc}/java.base/java/lang/String.html#compareTo(java.lang.String)[compareTo](String) -* int {java11-javadoc}/java.base/java/lang/String.html#compareToIgnoreCase(java.lang.String)[compareToIgnoreCase](String) -* String {java11-javadoc}/java.base/java/lang/String.html#concat(java.lang.String)[concat](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contains(java.lang.CharSequence)[contains](CharSequence) -* boolean {java11-javadoc}/java.base/java/lang/String.html#contentEquals(java.lang.CharSequence)[contentEquals](CharSequence) -* String decodeBase64() -* String encodeBase64() -* boolean {java11-javadoc}/java.base/java/lang/String.html#endsWith(java.lang.String)[endsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/Object.html#equals(java.lang.Object)[equals](Object) -* boolean {java11-javadoc}/java.base/java/lang/String.html#equalsIgnoreCase(java.lang.String)[equalsIgnoreCase](String) -* void {java11-javadoc}/java.base/java/lang/String.html#getChars(int,int,char%5B%5D,int)[getChars](int, int, char[], int) -* int {java11-javadoc}/java.base/java/lang/Object.html#hashCode()[hashCode]() -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String)[indexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#indexOf(java.lang.String,int)[indexOf](String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#isEmpty()[isEmpty]() -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String)[lastIndexOf](String) -* int {java11-javadoc}/java.base/java/lang/String.html#lastIndexOf(java.lang.String,int)[lastIndexOf](String, int) -* int {java11-javadoc}/java.base/java/lang/CharSequence.html#length()[length]() -* int {java11-javadoc}/java.base/java/lang/String.html#offsetByCodePoints(int,int)[offsetByCodePoints](int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(int,java.lang.String,int,int)[regionMatches](int, String, int, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#regionMatches(boolean,int,java.lang.String,int,int)[regionMatches](boolean, int, String, int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#replace(java.lang.CharSequence,java.lang.CharSequence)[replace](CharSequence, CharSequence) -* String replaceAll(Pattern, Function) -* String replaceFirst(Pattern, Function) -* String[] splitOnToken(String) -* String[] splitOnToken(String, int) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String)[startsWith](String) -* boolean {java11-javadoc}/java.base/java/lang/String.html#startsWith(java.lang.String,int)[startsWith](String, int) -* CharSequence {java11-javadoc}/java.base/java/lang/CharSequence.html#subSequence(int,int)[subSequence](int, int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int)[substring](int) -* String {java11-javadoc}/java.base/java/lang/String.html#substring(int,int)[substring](int, int) -* char[] {java11-javadoc}/java.base/java/lang/String.html#toCharArray()[toCharArray]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase()[toLowerCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toLowerCase(java.util.Locale)[toLowerCase](Locale) -* String {java11-javadoc}/java.base/java/lang/CharSequence.html#toString()[toString]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase()[toUpperCase]() -* String {java11-javadoc}/java.base/java/lang/String.html#toUpperCase(java.util.Locale)[toUpperCase](Locale) -* String {java11-javadoc}/java.base/java/lang/String.html#trim()[trim]() - - diff --git a/docs/painless/painless-contexts.asciidoc b/docs/painless/painless-contexts.asciidoc deleted file mode 100644 index 53ef5009cbda4..0000000000000 --- a/docs/painless/painless-contexts.asciidoc +++ /dev/null @@ -1,59 +0,0 @@ -[[painless-contexts]] -== Painless contexts - -A Painless script is evaluated within a context. Each context has values that -are available as local variables, an allowlist that controls the available -classes, and the methods and fields within those classes (API), and -if and what type of value is returned. - -Painless scripts typically run within one of the contexts in the following -table. Before using a Painless context, <>. Each context example is configured to operate on this data. - -[options="header",cols="<1,<1,<1"] -|==== -| Name | Painless Documentation - | Elasticsearch Documentation -| Runtime field | <> - | {ref}/runtime.html[Elasticsearch Documentation] - -| Ingest processor | <> - | {ref}/script-processor.html[Elasticsearch Documentation] -| Update | <> - | {ref}/docs-update.html[Elasticsearch Documentation] -| Update by query | <> - | {ref}/docs-update-by-query.html[Elasticsearch Documentation] -| Reindex | <> - | {ref}/docs-reindex.html[Elasticsearch Documentation] -| Sort | <> - | {ref}/sort-search-results.html[Elasticsearch Documentation] -| Similarity | <> - | {ref}/index-modules-similarity.html[Elasticsearch Documentation] -| Weight | <> - | {ref}/index-modules-similarity.html[Elasticsearch Documentation] -| Score | <> - | {ref}/query-dsl-function-score-query.html[Elasticsearch Documentation] -| Field | <> - | {ref}/search-fields.html#script-fields[Elasticsearch Documentation] -| Filter | <> - | {ref}/query-dsl-script-query.html[Elasticsearch Documentation] -| Minimum should match | <> - | {ref}/query-dsl-terms-set-query.html[Elasticsearch Documentation] -| Metric aggregation initialization | <> - | {ref}/search-aggregations-metrics-scripted-metric-aggregation.html[Elasticsearch Documentation] -| Metric aggregation map | <> - | {ref}/search-aggregations-metrics-scripted-metric-aggregation.html[Elasticsearch Documentation] -| Metric aggregation combine | <> - | {ref}/search-aggregations-metrics-scripted-metric-aggregation.html[Elasticsearch Documentation] -| Metric aggregation reduce | <> - | {ref}/search-aggregations-metrics-scripted-metric-aggregation.html[Elasticsearch Documentation] -| Bucket script aggregation | <> - | {ref}/search-aggregations-pipeline-bucket-script-aggregation.html[Elasticsearch Documentation] -| Bucket selector aggregation | <> - | {ref}/search-aggregations-pipeline-bucket-selector-aggregation.html[Elasticsearch Documentation] -| Watcher condition | <> - | {ref}/condition-script.html[Elasticsearch Documentation] -| Watcher transform | <> - | {ref}/transform-script.html[Elasticsearch Documentation] -|==== - -include::painless-contexts/index.asciidoc[] diff --git a/docs/painless/painless-contexts/index.asciidoc b/docs/painless/painless-contexts/index.asciidoc deleted file mode 100644 index 94a0a2901abfc..0000000000000 --- a/docs/painless/painless-contexts/index.asciidoc +++ /dev/null @@ -1,43 +0,0 @@ -include::painless-context-examples.asciidoc[] - -include::painless-runtime-fields-context.asciidoc[] - -include::painless-ingest-processor-context.asciidoc[] - -include::painless-update-context.asciidoc[] - -include::painless-update-by-query-context.asciidoc[] - -include::painless-reindex-context.asciidoc[] - -include::painless-sort-context.asciidoc[] - -include::painless-similarity-context.asciidoc[] - -include::painless-weight-context.asciidoc[] - -include::painless-score-context.asciidoc[] - -include::painless-field-context.asciidoc[] - -include::painless-filter-context.asciidoc[] - -include::painless-min-should-match-context.asciidoc[] - -include::painless-metric-agg-init-context.asciidoc[] - -include::painless-metric-agg-map-context.asciidoc[] - -include::painless-metric-agg-combine-context.asciidoc[] - -include::painless-metric-agg-reduce-context.asciidoc[] - -include::painless-bucket-script-agg-context.asciidoc[] - -include::painless-bucket-selector-agg-context.asciidoc[] - -include::painless-analysis-predicate-context.asciidoc[] - -include::painless-watcher-condition-context.asciidoc[] - -include::painless-watcher-transform-context.asciidoc[] diff --git a/docs/painless/painless-contexts/painless-analysis-predicate-context.asciidoc b/docs/painless/painless-contexts/painless-analysis-predicate-context.asciidoc deleted file mode 100644 index 55d3818a3462a..0000000000000 --- a/docs/painless/painless-contexts/painless-analysis-predicate-context.asciidoc +++ /dev/null @@ -1,43 +0,0 @@ -[[painless-analysis-predicate-context]] -=== Analysis Predicate Context - -Use a painless script to determine whether or not the current token in an -analysis chain matches a predicate. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`token.term` (`CharSequence`, read-only):: - The characters of the current token - -`token.position` (`int`, read-only):: - The position of the current token - -`token.positionIncrement` (`int`, read-only):: - The position increment of the current token - -`token.positionLength` (`int`, read-only):: - The position length of the current token - -`token.startOffset` (`int`, read-only):: - The start offset of the current token - -`token.endOffset` (`int`, read-only):: - The end offset of the current token - -`token.type` (`String`, read-only):: - The type of the current token - -`token.keyword` (`boolean`, read-only):: - Whether or not the current token is marked as a keyword - -*Return* - -`boolean`:: - Whether or not the current token matches the predicate - -*API* - -The standard <> is available. diff --git a/docs/painless/painless-contexts/painless-bucket-script-agg-context.asciidoc b/docs/painless/painless-contexts/painless-bucket-script-agg-context.asciidoc deleted file mode 100644 index 5f0dc32305a3c..0000000000000 --- a/docs/painless/painless-contexts/painless-bucket-script-agg-context.asciidoc +++ /dev/null @@ -1,86 +0,0 @@ -[[painless-bucket-script-agg-context]] -=== Bucket script aggregation context - -Use a Painless script in an -{ref}/search-aggregations-pipeline-bucket-script-aggregation.html[`bucket_script` pipeline aggregation] -to calculate a value as a result in a bucket. - -==== Variables - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. The parameters - include values defined as part of the `buckets_path`. - -==== Return - -numeric:: - The calculated value as the result. - -==== API - -The standard <> is available. - -==== Example - -To run this example, first follow the steps in <>. - -The painless context in a `bucket_script` aggregation provides a `params` map. This map contains both -user-specified custom values, as well as the values from other aggregations specified in the `buckets_path` -property. - -This example takes the values from a min and max aggregation, calculates the difference, -and adds the user-specified base_cost to the result: - -[source,Painless] --------------------------------------------------- -(params.max - params.min) + params.base_cost --------------------------------------------------- - -Note that the values are extracted from the `params` map. In context, the aggregation looks like this: - -[source,console] --------------------------------------------------- -GET /seats/_search -{ - "size": 0, - "aggs": { - "theatres": { - "terms": { - "field": "theatre", - "size": 10 - }, - "aggs": { - "min_cost": { - "min": { - "field": "cost" - } - }, - "max_cost": { - "max": { - "field": "cost" - } - }, - "spread_plus_base": { - "bucket_script": { - "buckets_path": { <1> - "min": "min_cost", - "max": "max_cost" - }, - "script": { - "params": { - "base_cost": 5 <2> - }, - "source": "(params.max - params.min) + params.base_cost" - } - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:seats] - -<1> The `buckets_path` points to two aggregations (`min_cost`, `max_cost`) and adds `min`/`max` variables -to the `params` map -<2> The user-specified `base_cost` is also added to the script's `params` map \ No newline at end of file diff --git a/docs/painless/painless-contexts/painless-bucket-selector-agg-context.asciidoc b/docs/painless/painless-contexts/painless-bucket-selector-agg-context.asciidoc deleted file mode 100644 index fb7b24240e365..0000000000000 --- a/docs/painless/painless-contexts/painless-bucket-selector-agg-context.asciidoc +++ /dev/null @@ -1,85 +0,0 @@ - -[[painless-bucket-selector-agg-context]] -=== Bucket selector aggregation context - -Use a Painless script in an -{ref}/search-aggregations-pipeline-bucket-selector-aggregation.html[`bucket_selector` aggregation] -to determine if a bucket should be retained or filtered out. - -==== Variables - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. The parameters - include values defined as part of the `buckets_path`. - -==== Return - -boolean:: - True if the bucket should be retained, false if the bucket should be filtered out. - -==== API - -The standard <> is available. - -==== Example - - -To run this example, first follow the steps in <>. - -The painless context in a `bucket_selector` aggregation provides a `params` map. This map contains both -user-specified custom values, as well as the values from other aggregations specified in the `buckets_path` -property. - -Unlike some other aggregation contexts, the `bucket_selector` context must return a boolean `true` or `false`. - -This example finds the max of each bucket, adds a user-specified base_cost, and retains all of the -buckets that are greater than `10`. - -[source,Painless] --------------------------------------------------- -params.max + params.base_cost > 10 --------------------------------------------------- - -Note that the values are extracted from the `params` map. The script is in the form of an expression -that returns `true` or `false`. In context, the aggregation looks like this: - -[source,console] --------------------------------------------------- -GET /seats/_search -{ - "size": 0, - "aggs": { - "theatres": { - "terms": { - "field": "theatre", - "size": 10 - }, - "aggs": { - "max_cost": { - "max": { - "field": "cost" - } - }, - "filtering_agg": { - "bucket_selector": { - "buckets_path": { <1> - "max": "max_cost" - }, - "script": { - "params": { - "base_cost": 5 <2> - }, - "source": "params.max + params.base_cost > 10" - } - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:seats] - -<1> The `buckets_path` points to the max aggregations (`max_cost`) and adds `max` variables -to the `params` map -<2> The user-specified `base_cost` is also added to the `params` map diff --git a/docs/painless/painless-contexts/painless-context-examples.asciidoc b/docs/painless/painless-contexts/painless-context-examples.asciidoc deleted file mode 100644 index e244425087c06..0000000000000 --- a/docs/painless/painless-contexts/painless-context-examples.asciidoc +++ /dev/null @@ -1,109 +0,0 @@ -[[painless-context-examples]] -=== Context example data - -Complete the following steps to index the `seat` sample data into {es}. You can -run any of the context examples against this sample data after you configure it. - -Each document in the `seat` data contains the following fields: - -`theatre` ({ref}/keyword.html[`keyword`]):: - The name of the theater the play is in. -`play` ({ref}/text.html[`keyword`]):: - The name of the play. -`actors` ({ref}/text.html[`keyword`]):: - A list of actors in the play. -`date` ({ref}/keyword.html[`keyword`]):: - The date of the play as a keyword. -`time` ({ref}/keyword.html[`keyword`]):: - The time of the play as a keyword. -`cost` ({ref}/number.html[`long`]):: - The cost of the ticket for the seat. -`row` ({ref}/number.html[`long`]):: - The row of the seat. -`number` ({ref}/number.html[`long`]):: - The number of the seat within a row. -`sold` ({ref}/boolean.html[`boolean`]):: - Whether or not the seat is sold. -`datetime` ({ref}/date.html[`date`]):: - The date and time of the play as a date object. - -==== Prerequisites -Start an {ref}/configuring-stack-security.html[{es} instance], and then access the -{kibana-ref}/console-kibana.html[Console] in {kib}. - -==== Configure the `seat` sample data -. From the {kib} Console, create {ref}/mapping.html[mappings] for the sample -data: -+ -[source,console] ----- -PUT /seats -{ - "mappings": { - "properties": { - "theatre": { "type": "keyword" }, - "play": { "type": "keyword" }, - "actors": { "type": "keyword" }, - "date": { "type": "keyword" }, - "time": { "type": "keyword" }, - "cost": { "type": "double" }, - "row": { "type": "integer" }, - "number": { "type": "integer" }, - "sold": { "type": "boolean" }, - "datetime": { "type": "date" } - } - } -} ----- -+ - -. Configure a script ingest processor that parses each document as {es} ingests -the `seat` data. The following ingest script processes the `date` and `time` -fields and stores the result in a `datetime` field: -+ -[source,console] ----- -PUT /_ingest/pipeline/seats -{ - "description": "update datetime for seats", - "processors": [ - { - "script": { - "source": "String[] dateSplit = ctx.date.splitOnToken('-'); String year = dateSplit[0].trim(); String month = dateSplit[1].trim(); if (month.length() == 1) { month = '0' + month; } String day = dateSplit[2].trim(); if (day.length() == 1) { day = '0' + day; } boolean pm = ctx.time.substring(ctx.time.length() - 2).equals('PM'); String[] timeSplit = ctx.time.substring(0, ctx.time.length() - 2).splitOnToken(':'); int hours = Integer.parseInt(timeSplit[0].trim()); int minutes = Integer.parseInt(timeSplit[1].trim()); if (pm) { hours += 12; } String dts = year + '-' + month + '-' + day + 'T' + (hours < 10 ? '0' + hours : '' + hours) + ':' + (minutes < 10 ? '0' + minutes : '' + minutes) + ':00+08:00'; ZonedDateTime dt = ZonedDateTime.parse(dts, DateTimeFormatter.ISO_OFFSET_DATE_TIME); ctx.datetime = dt.getLong(ChronoField.INSTANT_SECONDS)*1000L;" - } - } - ] -} ----- -// TEST[continued] - -. Ingest some sample data using the `seats` ingest pipeline that you defined in -the previous step. -+ -[source,console] ----- -POST seats/_bulk?pipeline=seats&refresh=true -{"create":{"_index":"seats","_id":"1"}} -{"theatre":"Skyline","play":"Rent","actors":["James Holland","Krissy Smith","Joe Muir","Ryan Earns"],"date":"2021-4-1","time":"3:00PM","cost":37,"row":1,"number":7,"sold":false} -{"create":{"_index":"seats","_id":"2"}} -{"theatre":"Graye","play":"Rent","actors":["Dave Christmas"],"date":"2021-4-1","time":"3:00PM","cost":30,"row":3,"number":5,"sold":false} -{"create":{"_index":"seats","_id":"3"}} -{"theatre":"Graye","play":"Rented","actors":["Dave Christmas"],"date":"2021-4-1","time":"3:00PM","cost":33,"row":2,"number":6,"sold":false} -{"create":{"_index":"seats","_id":"4"}} -{"theatre":"Skyline","play":"Rented","actors":["James Holland","Krissy Smith","Joe Muir","Ryan Earns"],"date":"2021-4-1","time":"3:00PM","cost":20,"row":5,"number":2,"sold":false} -{"create":{"_index":"seats","_id":"5"}} -{"theatre":"Down Port","play":"Pick It Up","actors":["Joel Madigan","Jessica Brown","Baz Knight","Jo Hangum","Rachel Grass","Phoebe Miller"],"date":"2018-4-2","time":"8:00PM","cost":27.5,"row":3,"number":2,"sold":false} -{"create":{"_index":"seats","_id":"6"}} -{"theatre":"Down Port","play":"Harriot","actors":["Phoebe Miller","Sarah Notch","Brayden Green","Joshua Iller","Jon Hittle","Rob Kettleman","Laura Conrad","Simon Hower","Nora Blue","Mike Candlestick","Jacey Bell"],"date":"2018-8-7","time":"8:00PM","cost":30,"row":1,"number":10,"sold":false} -{"create":{"_index":"seats","_id":"7"}} -{"theatre":"Skyline","play":"Auntie Jo","actors":["Jo Hangum","Jon Hittle","Rob Kettleman","Laura Conrad","Simon Hower","Nora Blue"],"date":"2018-10-2","time":"5:40PM","cost":22.5,"row":7,"number":10,"sold":false} -{"create":{"_index":"seats","_id":"8"}} -{"theatre":"Skyline","play":"Test Run","actors":["Joe Muir","Ryan Earns","Joel Madigan","Jessica Brown"],"date":"2018-8-5","time":"7:30PM","cost":17.5,"row":11,"number":12,"sold":true} -{"create":{"_index":"seats","_id":"9"}} -{"theatre":"Skyline","play":"Sunnyside Down","actors":["Krissy Smith","Joe Muir","Ryan Earns","Nora Blue","Mike Candlestick","Jacey Bell"],"date":"2018-6-12","time":"4:00PM","cost":21.25,"row":8,"number":15,"sold":true} -{"create":{"_index":"seats","_id":"10"}} -{"theatre":"Graye","play":"Line and Single","actors":["Nora Blue","Mike Candlestick"],"date":"2018-6-5","time":"2:00PM","cost":30,"row":1,"number":2,"sold":false} -{"create":{"_index":"seats","_id":"11"}} -{"theatre":"Graye","play":"Hamilton","actors":["Lin-Manuel Miranda","Leslie Odom Jr."],"date":"2018-6-5","time":"2:00PM","cost":5000,"row":1,"number":20,"sold":true} ----- -// TEST[continued] diff --git a/docs/painless/painless-contexts/painless-field-context.asciidoc b/docs/painless/painless-contexts/painless-field-context.asciidoc deleted file mode 100644 index 661af8e64d1e0..0000000000000 --- a/docs/painless/painless-contexts/painless-field-context.asciidoc +++ /dev/null @@ -1,135 +0,0 @@ -[[painless-field-context]] -=== Field context - -Use a Painless script to create a -{ref}/search-fields.html#script-fields[script field] to return -a customized value for each document in the results of a query. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`doc` (`Map`, read-only):: - Contains the fields of the specified document where each field is a - `List` of values. - -{ref}/mapping-source-field.html[`params['_source']`] (`Map`, read-only):: - Contains extracted JSON in a `Map` and `List` structure for the fields - existing in a stored document. - -*Return* - -`Object`:: - The customized value for each document. - -*API* - -Both the standard <> and -<> are available. - - -*Example* - -To run this example, first follow the steps in -<>. - -You can then use these two example scripts to compute custom information -for each search hit and output it to two new fields. - -The first script gets the doc value for the `datetime` field and calls -the `getDayOfWeekEnum` function to determine the corresponding day of the week. - -[source,Painless] ----- -doc['datetime'].value.getDayOfWeekEnum().getDisplayName(TextStyle.FULL, Locale.ROOT) ----- - -The second script calculates the number of actors. Actors' names are stored -as a keyword array in the `actors` field. - -[source,Painless] ----- -doc['actors'].size() <1> ----- - -<1> By default, doc values are not available for `text` fields. If `actors` was -a `text` field, you could still calculate the number of actors by extracting -values from `_source` with `params['_source']['actors'].size()`. - -The following request returns the calculated day of week and the number of -actors that appear in each play: - -[source,console] ----- -GET seats/_search -{ - "size": 2, - "query": { - "match_all": {} - }, - "script_fields": { - "day-of-week": { - "script": { - "source": "doc['datetime'].value.getDayOfWeekEnum().getDisplayName(TextStyle.FULL, Locale.ENGLISH)" - } - }, - "number-of-actors": { - "script": { - "source": "doc['actors'].size()" - } - } - } -} ----- -// TEST[setup:seats] - -[source,console-result] ----- -{ - "took" : 68, - "timed_out" : false, - "_shards" : { - "total" : 1, - "successful" : 1, - "skipped" : 0, - "failed" : 0 - }, - "hits" : { - "total" : { - "value" : 11, - "relation" : "eq" - }, - "max_score" : 1.0, - "hits" : [ - { - "_index" : "seats", - "_id" : "1", - "_score" : 1.0, - "fields" : { - "day-of-week" : [ - "Thursday" - ], - "number-of-actors" : [ - 4 - ] - } - }, - { - "_index" : "seats", - "_id" : "2", - "_score" : 1.0, - "fields" : { - "day-of-week" : [ - "Thursday" - ], - "number-of-actors" : [ - 1 - ] - } - } - ] - } -} ----- -// TESTRESPONSE[s/"took" : 68/"took" : "$body.took"/] diff --git a/docs/painless/painless-contexts/painless-filter-context.asciidoc b/docs/painless/painless-contexts/painless-filter-context.asciidoc deleted file mode 100644 index 488a621991c89..0000000000000 --- a/docs/painless/painless-contexts/painless-filter-context.asciidoc +++ /dev/null @@ -1,64 +0,0 @@ -[[painless-filter-context]] -=== Filter context - -Use a Painless script as a {ref}/query-dsl-script-query.html[filter] in a -query to include and exclude documents. - - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`doc` (`Map`, read-only):: - Contains the fields of the current document where each field is a - `List` of values. - -*Return* - -`boolean`:: - Return `true` if the current document should be returned as a result of - the query, and `false` otherwise. - - -*API* - -The standard <> is available. - -*Example* - -To run this example, first follow the steps in -<>. - -This script finds all unsold documents that cost less than $25. - -[source,Painless] ----- -doc['sold'].value == false && doc['cost'].value < 25 ----- - -Defining `cost` as a script parameter enables the cost to be configured -in the script query request. For example, the following request finds -all available theatre seats for evening performances that are under $25. - -[source,console] ----- -GET seats/_search -{ - "query": { - "bool": { - "filter": { - "script": { - "script": { - "source": "doc['sold'].value == false && doc['cost'].value < params.cost", - "params": { - "cost": 25 - } - } - } - } - } - } -} ----- -// TEST[setup:seats] diff --git a/docs/painless/painless-contexts/painless-ingest-processor-context.asciidoc b/docs/painless/painless-contexts/painless-ingest-processor-context.asciidoc deleted file mode 100644 index 0e9001f84feac..0000000000000 --- a/docs/painless/painless-contexts/painless-ingest-processor-context.asciidoc +++ /dev/null @@ -1,138 +0,0 @@ -[[painless-ingest-processor-context]] -=== Ingest processor context - -Use a Painless script in an {ref}/script-processor.html[ingest processor] -to modify documents upon insertion. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -{ref}/mapping-index-field.html[`ctx['_index']`] (`String`):: - The name of the index. - -`ctx` (`Map`):: - Contains extracted JSON in a `Map` and `List` structure for the fields - that are part of the document. - -*Side Effects* - -{ref}/mapping-index-field.html[`ctx['_index']`]:: - Modify this to change the destination index for the current document. - -`ctx` (`Map`):: - Modify the values in the `Map/List` structure to add, modify, or delete - the fields of a document. - -*Return* - -void:: - No expected return value. - -*API* - -Both the standard <> and -<> are available. - -*Example* - -To run this example, first follow the steps in -<>. - -The seat data contains: - -* A date in the format `YYYY-MM-DD` where the second digit of both month and day - is optional. -* A time in the format HH:MM* where the second digit of both hours and minutes - is optional. The star (*) represents either the `String` `AM` or `PM`. - -The following ingest script processes the date and time `Strings` and stores the -result in a `datetime` field. - -[source,Painless] ----- -String[] dateSplit = ctx.date.splitOnToken("-"); <1> -String year = dateSplit[0].trim(); -String month = dateSplit[1].trim(); - -if (month.length() == 1) { <2> - month = "0" + month; -} - -String day = dateSplit[2].trim(); - -if (day.length() == 1) { <3> - day = "0" + day; -} - -boolean pm = ctx.time.substring(ctx.time.length() - 2).equals("PM"); <4> -String[] timeSplit = ctx.time.substring(0, - ctx.time.length() - 2).splitOnToken(":"); <5> -int hours = Integer.parseInt(timeSplit[0].trim()); -int minutes = Integer.parseInt(timeSplit[1].trim()); - -if (pm) { <6> - hours += 12; -} - -String dts = year + "-" + month + "-" + day + "T" + - (hours < 10 ? "0" + hours : "" + hours) + ":" + - (minutes < 10 ? "0" + minutes : "" + minutes) + - ":00+08:00"; <7> - -ZonedDateTime dt = ZonedDateTime.parse( - dts, DateTimeFormatter.ISO_OFFSET_DATE_TIME); <8> -ctx.datetime = dt.getLong(ChronoField.INSTANT_SECONDS)*1000L; <9> ----- -<1> Uses the `splitOnToken` function to separate the date `String` from the - seat data into year, month, and day `Strings`. - Note:: - * The use of the `ctx` ingest processor context variable to retrieve the - data from the `date` field. -<2> Appends the <> `"0"` value to a single - digit month since the format of the seat data allows for this case. -<3> Appends the <> `"0"` value to a single - digit day since the format of the seat data allows for this case. -<4> Sets the <> - <> to `true` if the time `String` is a time - in the afternoon or evening. - Note:: - * The use of the `ctx` ingest processor context variable to retrieve the - data from the `time` field. -<5> Uses the `splitOnToken` function to separate the time `String` from the - seat data into hours and minutes `Strings`. - Note:: - * The use of the `substring` method to remove the `AM` or `PM` portion of - the time `String`. - * The use of the `ctx` ingest processor context variable to retrieve the - data from the `date` field. -<6> If the time `String` is an afternoon or evening value adds the - <> `12` to the existing hours to move to - a 24-hour based time. -<7> Builds a new time `String` that is parsable using existing API methods. -<8> Creates a `ZonedDateTime` <> value by using - the API method `parse` to parse the new time `String`. -<9> Sets the datetime field `datetime` to the number of milliseconds retrieved - from the API method `getLong`. - Note:: - * The use of the `ctx` ingest processor context variable to set the field - `datetime`. Manipulate each document's fields with the `ctx` variable as - each document is indexed. - -Submit the following request: - -[source,console] ----- -PUT /_ingest/pipeline/seats -{ - "description": "update datetime for seats", - "processors": [ - { - "script": { - "source": "String[] dateSplit = ctx.date.splitOnToken('-'); String year = dateSplit[0].trim(); String month = dateSplit[1].trim(); if (month.length() == 1) { month = '0' + month; } String day = dateSplit[2].trim(); if (day.length() == 1) { day = '0' + day; } boolean pm = ctx.time.substring(ctx.time.length() - 2).equals('PM'); String[] timeSplit = ctx.time.substring(0, ctx.time.length() - 2).splitOnToken(':'); int hours = Integer.parseInt(timeSplit[0].trim()); int minutes = Integer.parseInt(timeSplit[1].trim()); if (pm) { hours += 12; } String dts = year + '-' + month + '-' + day + 'T' + (hours < 10 ? '0' + hours : '' + hours) + ':' + (minutes < 10 ? '0' + minutes : '' + minutes) + ':00+08:00'; ZonedDateTime dt = ZonedDateTime.parse(dts, DateTimeFormatter.ISO_OFFSET_DATE_TIME); ctx.datetime = dt.getLong(ChronoField.INSTANT_SECONDS)*1000L;" - } - } - ] -} ----- diff --git a/docs/painless/painless-contexts/painless-metric-agg-combine-context.asciidoc b/docs/painless/painless-contexts/painless-metric-agg-combine-context.asciidoc deleted file mode 100644 index 2d5edf6ab4cd8..0000000000000 --- a/docs/painless/painless-contexts/painless-metric-agg-combine-context.asciidoc +++ /dev/null @@ -1,27 +0,0 @@ -[[painless-metric-agg-combine-context]] -=== Metric aggregation combine context - -Use a Painless script to -{ref}/search-aggregations-metrics-scripted-metric-aggregation.html[combine] -values for use in a scripted metric aggregation. A combine script is run once -per shard following a <> and is -optional as part of a full metric aggregation. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`state` (`Map`):: - `Map` with values available from the prior map script. - -*Return* - -`List`, `Map`, `String`, or primitive:: - A value collected for use in a - <>. If no reduce - script is specified, the value is used as part of the result. - -*API* - -The standard <> is available. diff --git a/docs/painless/painless-contexts/painless-metric-agg-init-context.asciidoc b/docs/painless/painless-contexts/painless-metric-agg-init-context.asciidoc deleted file mode 100644 index 2d40fcf427a4d..0000000000000 --- a/docs/painless/painless-contexts/painless-metric-agg-init-context.asciidoc +++ /dev/null @@ -1,32 +0,0 @@ -[[painless-metric-agg-init-context]] -=== Metric aggregation initialization context - -Use a Painless script to -{ref}/search-aggregations-metrics-scripted-metric-aggregation.html[initialize] -values for use in a scripted metric aggregation. An initialization script is -run prior to document collection once per shard and is optional as part of the -full metric aggregation. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`state` (`Map`):: - Empty `Map` used to add values for use in a - <>. - -*Side Effects* - -`state` (`Map`):: - Add values to this `Map` to for use in a map. Additional values must - be of the type `Map`, `List`, `String` or primitive. - -*Return* - -`void`:: - No expected return value. - -*API* - -The standard <> is available. diff --git a/docs/painless/painless-contexts/painless-metric-agg-map-context.asciidoc b/docs/painless/painless-contexts/painless-metric-agg-map-context.asciidoc deleted file mode 100644 index 4c7ef36ddace6..0000000000000 --- a/docs/painless/painless-contexts/painless-metric-agg-map-context.asciidoc +++ /dev/null @@ -1,47 +0,0 @@ -[[painless-metric-agg-map-context]] -=== Metric aggregation map context - -Use a Painless script to -{ref}/search-aggregations-metrics-scripted-metric-aggregation.html[map] -values for use in a scripted metric aggregation. A map script is run once per -collected document following an optional -<> and is required as -part of a full metric aggregation. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`state` (`Map`):: - `Map` used to add values for processing in a - <> or to be returned from the aggregation. - -`doc` (`Map`, read-only):: - Contains the fields of the current document where each field is a - `List` of values. - -`_score` (`double` read-only):: - The similarity score of the current document. - -*Side Effects* - -`state` (`Map`):: - Use this `Map` to add values for processing in a combine script. - Additional values must be of the type `Map`, `List`, `String` or - primitive. The same `state` `Map` is shared between all aggregated documents - on a given shard. If an initialization script is provided as part of the - aggregation then values added from the initialization script are - available. If no combine script is specified, values must be - directly stored in `state` in a usable form. If no combine script and no - <> are specified, the - `state` values are used as the result. - -*Return* - -`void`:: - No expected return value. - -*API* - -The standard <> is available. diff --git a/docs/painless/painless-contexts/painless-metric-agg-reduce-context.asciidoc b/docs/painless/painless-contexts/painless-metric-agg-reduce-context.asciidoc deleted file mode 100644 index ba6b6dabdc924..0000000000000 --- a/docs/painless/painless-contexts/painless-metric-agg-reduce-context.asciidoc +++ /dev/null @@ -1,28 +0,0 @@ -[[painless-metric-agg-reduce-context]] -=== Metric aggregation reduce context - -Use a Painless script to -{ref}/search-aggregations-metrics-scripted-metric-aggregation.html[reduce] -values to produce the result of a scripted metric aggregation. A reduce script -is run once on the coordinating node following a -<> (or a -<> if no combine script is -specified) and is optional as part of a full metric aggregation. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`states` (`Map`):: - `Map` with values available from the prior combine script (or a map - script if no combine script is specified). - -*Return* - -`List`, `Map`, `String`, or primitive:: - A value used as the result. - -*API* - -The standard <> is available. diff --git a/docs/painless/painless-contexts/painless-min-should-match-context.asciidoc b/docs/painless/painless-contexts/painless-min-should-match-context.asciidoc deleted file mode 100644 index 9d1c70b5bc95a..0000000000000 --- a/docs/painless/painless-contexts/painless-min-should-match-context.asciidoc +++ /dev/null @@ -1,80 +0,0 @@ -[[painless-min-should-match-context]] -=== Minimum should match context - -Use a Painless script to specify the -{ref}/query-dsl-terms-set-query.html[minimum] number of terms that a -specified field needs to match with for a document to be part of the query -results. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`params['num_terms']` (`int`, read-only):: - The number of terms specified to match with. - -`doc` (`Map`, read-only):: - Contains the fields of the current document where each field is a - `List` of values. - -*Return* - -`int`:: - The minimum number of terms required to match the current document. - -*API* - -The standard <> is available. - -*Example* - -To run this example, first follow the steps in -<>. - -Imagine that you want to find seats to performances by your favorite -actors. You have a list of favorite actors in mind, and you want -to find performances where the cast includes at least a certain -number of them. - -To achieve this result, use a `terms_set` query with -`minimum_should_match_script`. To make the query request more configurable, -you can define `min_actors_to_see` as a script parameter. - -To ensure that the parameter `min_actors_to_see` doesn't exceed -the number of favorite actors, you can use `num_terms` to get -the number of actors in the list and `Math.min` to get the lesser -of the two. - -[source,Painless] ----- -Math.min(params['num_terms'], params['min_actors_to_see']) ----- - -The following request finds seats to performances with at least -two of the three specified actors. - -[source,console] ----- -GET seats/_search -{ - "query": { - "terms_set": { - "actors": { - "terms": [ - "smith", - "earns", - "black" - ], - "minimum_should_match_script": { - "source": "Math.min(params['num_terms'], params['min_actors_to_see'])", - "params": { - "min_actors_to_see": 2 - } - } - } - } - } -} ----- -// TEST[setup:seats] diff --git a/docs/painless/painless-contexts/painless-reindex-context.asciidoc b/docs/painless/painless-contexts/painless-reindex-context.asciidoc deleted file mode 100644 index 9aae1ae70c5ac..0000000000000 --- a/docs/painless/painless-contexts/painless-reindex-context.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -[[painless-reindex-context]] -=== Reindex context - -Use a Painless script in a {ref}/docs-reindex.html[reindex] operation to -add, modify, or delete fields within each document in an original index as its -reindexed into a target index. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`ctx['op']` (`String`):: - The name of the operation. - -{ref}/mapping-routing-field.html[`ctx['_routing']`] (`String`):: - The value used to select a shard for document storage. - -{ref}/mapping-index-field.html[`ctx['_index']`] (`String`):: - The name of the index. - -{ref}/mapping-id-field.html[`ctx['_id']`] (`String`):: - The unique document id. - -`ctx['_version']` (`int`):: - The current version of the document. - -{ref}/mapping-source-field.html[`ctx['_source']`] (`Map`):: - Contains extracted JSON in a `Map` and `List` structure for the fields - existing in a stored document. - -*Side Effects* - -`ctx['op']`:: - Use the default of `index` to update a document. Set to `noop` to - specify no operation or `delete` to delete the current document from - the index. - -{ref}/mapping-routing-field.html[`ctx['_routing']`]:: - Modify this to change the routing value for the current document. - -{ref}/mapping-index-field.html[`ctx['_index']`]:: - Modify this to change the destination index for the current document. - -{ref}/mapping-id-field.html[`ctx['_id']`]:: - Modify this to change the id for the current document. - -`ctx['_version']` (`int`):: - Modify this to modify the version for the current document. - -{ref}/mapping-source-field.html[`ctx['_source']`]:: - Modify the values in the `Map/List` structure to add, modify, or delete - the fields of a document. - -*Return* - -`void`:: - No expected return value. - -*API* - -The standard <> is available. diff --git a/docs/painless/painless-contexts/painless-runtime-fields-context.asciidoc b/docs/painless/painless-contexts/painless-runtime-fields-context.asciidoc deleted file mode 100644 index 592e0b9c2ca6a..0000000000000 --- a/docs/painless/painless-contexts/painless-runtime-fields-context.asciidoc +++ /dev/null @@ -1,188 +0,0 @@ -[[painless-runtime-fields-context]] -=== Runtime fields context -Use a Painless script to calculate and emit -<> values. - -See the {ref}/runtime.html[runtime fields] documentation for more information -about how to use runtime fields. - -*Methods* - --- -[[runtime-emit-method]] -// tag::runtime-field-emit[] -`emit`:: (Required) - Accepts the values from the script valuation. Scripts can call the - `emit` method multiple times to emit multiple values. -+ -The `emit` method applies only to scripts used in a -<>. -+ -IMPORTANT: The `emit` method cannot accept `null` values. Do not call this -method if the referenced fields do not have any values. -+ -.Signatures of `emit` -[%collapsible%open] -==== -The signature for `emit` depends on the `type` of the field. - -[horizontal] -`boolean`:: `emit(boolean)` -`date`:: `emit(long)` -`double`:: `emit(double)` -`geo_point`:: `emit(double lat, double lon)` -`ip`:: `emit(String)` -`long`:: `emit(long)` -`keyword`:: `emit(String)` -==== -// end::runtime-field-emit[] --- - --- -`grok`:: - Defines a {ref}/grok-processor.html[grok pattern] to extract structured fields out of a single text field within a document. A grok pattern is like a regular expression that supports aliased expressions that can be reused. See {ref}/runtime-examples.html#runtime-examples-grok[Define a runtime field with a grok pattern]. -+ -.Properties of `grok` -[%collapsible%open] -==== -`extract`:: - Indicates the values to return. This method applies only to `grok` and - `dissect` methods. -==== --- - --- -`dissect`:: - Defines a {ref}/dissect-processor.html[dissect pattern]. Dissect operates much like grok, but does not accept regular expressions. See {ref}/runtime-examples.html#runtime-examples-dissect[Define a runtime field with a dissect pattern]. -+ -.Properties of `dissect` -[%collapsible%open] -==== -`extract`:: - Indicates the values to return. This method applies only to `grok` and - `dissect` methods. -==== --- - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`doc` (`Map`, read-only):: - Contains the fields of the specified document where each field is a - `List` of values. - -{ref}/mapping-source-field.html[`params['_source']`] (`Map`, read-only):: - Contains extracted JSON in a `Map` and `List` structure for the fields - existing in a stored document. - -*Return* - -`void`:: - No expected return value. - -*API* - -Both the standard <> and -<> are available. - - -*Example* - -To run the examples, first follow the steps in -<>. - -Then, run the following request to define a runtime field named `day_of_week`. -This field contains a script with the same `source` defined in -<>, but also uses an `emit` function -that runtime fields require when defining a Painless script. - -Because `day_of_week` is a runtime field, it isn't indexed, and the included -script only runs at query time: - -[source,console] ----- -PUT seats/_mapping -{ - "runtime": { - "day_of_week": { - "type": "keyword", - "script": { - "source": "emit(doc['datetime'].value.getDayOfWeekEnum().toString())" - } - } - } -} ----- -// TEST[setup:seats] - -After defining the runtime field and script in the mappings, you can run a -query that includes a terms aggregation for `day_of_week`. When the query runs, -{es} evaluates the included Painless script and dynamically generates a value -based on the script definition: - -[source,console] ----- -GET seats/_search -{ - "size": 0, - "fields": [ - "time", - "day_of_week" - ], - "aggs": { - "day_of_week": { - "terms": { - "field": "day_of_week", - "size": 10 - } - } - } -} ----- -// TEST[continued] - -The response includes `day_of_week` for each hit. {es} calculates the value for -this field dynamically at search time by operating on the `datetime` field -defined in the mappings. - -[source,console-result] ----- -{ - ... - "hits" : { - "total" : { - "value" : 11, - "relation" : "eq" - }, - "max_score" : null, - "hits" : [ ] - }, - "aggregations" : { - "day_of_week" : { - "doc_count_error_upper_bound" : 0, - "sum_other_doc_count" : 0, - "buckets" : [ - { - "key" : "TUESDAY", - "doc_count" : 5 - }, - { - "key" : "THURSDAY", - "doc_count" : 4 - }, - { - "key" : "MONDAY", - "doc_count" : 1 - }, - { - "key" : "SUNDAY", - "doc_count" : 1 - } - ] - } - } -} ----- -// TESTRESPONSE[s/\.\.\./"took" : $body.took,"timed_out" : $body.timed_out,"_shards" : $body._shards,/] diff --git a/docs/painless/painless-contexts/painless-score-context.asciidoc b/docs/painless/painless-contexts/painless-score-context.asciidoc deleted file mode 100644 index 72fc86b0f9b7c..0000000000000 --- a/docs/painless/painless-contexts/painless-score-context.asciidoc +++ /dev/null @@ -1,60 +0,0 @@ -[[painless-score-context]] -=== Score context - -Use a Painless script in a -{ref}/query-dsl-function-score-query.html[function score] to apply a new -score to documents returned from a query. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`doc` (`Map`, read-only):: - Contains the fields of the current document. For single-valued fields, - the value can be accessed via `doc['fieldname'].value`. For multi-valued - fields, this returns the first value; other values can be accessed - via `doc['fieldname'].get(index)` - -`_score` (`double` read-only):: - The similarity score of the current document. - -*Return* - -`double`:: - The score for the current document. - -*API* - -Both the standard <> and -<> are available. - -*Example* - -To run this example, first follow the steps in -<>. - -The following query finds all unsold seats, with lower 'row' values -scored higher. - -[source,console] --------------------------------------------------- -GET /seats/_search -{ - "query": { - "function_score": { - "query": { - "match": { - "sold": "false" - } - }, - "script_score": { - "script": { - "source": "1.0 / doc['row'].value" - } - } - } - } -} --------------------------------------------------- -// TEST[setup:seats] \ No newline at end of file diff --git a/docs/painless/painless-contexts/painless-similarity-context.asciidoc b/docs/painless/painless-contexts/painless-similarity-context.asciidoc deleted file mode 100644 index 1e73860ec8daf..0000000000000 --- a/docs/painless/painless-contexts/painless-similarity-context.asciidoc +++ /dev/null @@ -1,59 +0,0 @@ -[[painless-similarity-context]] -=== Similarity context - -Use a Painless script to create a -{ref}/index-modules-similarity.html[similarity] equation for scoring -documents in a query. - -*Variables* - -`weight` (`float`, read-only):: - The weight as calculated by a <> - -`query.boost` (`float`, read-only):: - The boost value if provided by the query. If this is not provided the - value is `1.0f`. - -`field.docCount` (`long`, read-only):: - The number of documents that have a value for the current field. - -`field.sumDocFreq` (`long`, read-only):: - The sum of all terms that exist for the current field. If this is not - available the value is `-1`. - -`field.sumTotalTermFreq` (`long`, read-only):: - The sum of occurrences in the index for all the terms that exist in the - current field. If this is not available the value is `-1`. - -`term.docFreq` (`long`, read-only):: - The number of documents that contain the current term in the index. - -`term.totalTermFreq` (`long`, read-only):: - The total occurrences of the current term in the index. - -`doc.length` (`long`, read-only):: - The number of tokens the current document has in the current field. This - is decoded from the stored {ref}/norms.html[norms] and may be approximate for - long fields - -`doc.freq` (`long`, read-only):: - The number of occurrences of the current term in the current - document for the current field. - -Note that the `query`, `field`, and `term` variables are also available to the -<>. They are more efficiently used -there, as they are constant for all documents. - -For queries that contain multiple terms, the script is called once for each -term with that term's calculated weight, and the results are summed. Note that some -terms might have a `doc.freq` value of `0` on a document, for example if a query -uses synonyms. - -*Return* - -`double`:: - The similarity score for the current document. - -*API* - -The standard <> is available. \ No newline at end of file diff --git a/docs/painless/painless-contexts/painless-sort-context.asciidoc b/docs/painless/painless-contexts/painless-sort-context.asciidoc deleted file mode 100644 index 4e8ec3e2787da..0000000000000 --- a/docs/painless/painless-contexts/painless-sort-context.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -[[painless-sort-context]] -=== Sort context - -Use a Painless script to -{ref}/sort-search-results.html[sort] the documents in a query. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`doc` (`Map`, read-only):: - Contains the fields of the current document. For single-valued fields, - the value can be accessed via `doc['fieldname'].value`. For multi-valued - fields, this returns the first value; other values can be accessed - via `doc['fieldname'].get(index)` - -`_score` (`double` read-only):: - The similarity score of the current document. - -*Return* - -`double` or `String`:: - The sort key. The return type depends on the value of the `type` parameter - in the script sort config (`"number"` or `"string"`). - -*API* - -The standard <> is available. - -*Example* - -To run this example, first follow the steps in -<>. - -To sort results by the length of the `theatre` field, submit the following query: - -[source,console] ----- -GET /_search -{ - "query": { - "term": { - "sold": "true" - } - }, - "sort": { - "_script": { - "type": "number", - "script": { - "lang": "painless", - "source": "doc['theatre'].value.length() * params.factor", - "params": { - "factor": 1.1 - } - }, - "order": "asc" - } - } -} ----- -// TEST[setup:seats] diff --git a/docs/painless/painless-contexts/painless-update-by-query-context.asciidoc b/docs/painless/painless-contexts/painless-update-by-query-context.asciidoc deleted file mode 100644 index 78a8b8d36d6bb..0000000000000 --- a/docs/painless/painless-contexts/painless-update-by-query-context.asciidoc +++ /dev/null @@ -1,92 +0,0 @@ -[[painless-update-by-query-context]] -=== Update by query context - -Use a Painless script in an -{ref}/docs-update-by-query.html[update by query] operation to add, -modify, or delete fields within each of a set of documents collected as the -result of query. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`ctx['op']` (`String`):: - The name of the operation. - -{ref}/mapping-routing-field.html[`ctx['_routing']`] (`String`, read-only):: - The value used to select a shard for document storage. - -{ref}/mapping-index-field.html[`ctx['_index']`] (`String`, read-only):: - The name of the index. - -{ref}/mapping-id-field.html[`ctx['_id']`] (`String`, read-only):: - The unique document id. - -`ctx['_version']` (`int`, read-only):: - The current version of the document. - -{ref}/mapping-source-field.html[`ctx['_source']`] (`Map`):: - Contains extracted JSON in a `Map` and `List` structure for the fields - existing in a stored document. - -*Side Effects* - -`ctx['op']`:: - Use the default of `index` to update a document. Set to `none` to - specify no operation or `delete` to delete the current document from - the index. - -{ref}/mapping-source-field.html[`ctx['_source']`]:: - Modify the values in the `Map/List` structure to add, modify, or delete - the fields of a document. - -*Return* - -`void`:: - No expected return value. - -*API* - -The standard <> is available. - -*Example* - -To run this example, first follow the steps in -<>. - -The following query finds all seats in a specific section that have not been -sold and lowers the price by 2: - -[source,console] --------------------------------------------------- -POST /seats/_update_by_query -{ - "query": { - "bool": { - "filter": [ - { - "range": { - "row": { - "lte": 3 - } - } - }, - { - "match": { - "sold": false - } - } - ] - } - }, - "script": { - "source": "ctx._source.cost -= params.discount", - "lang": "painless", - "params": { - "discount": 2 - } - } -} --------------------------------------------------- -// TEST[setup:seats] diff --git a/docs/painless/painless-contexts/painless-update-context.asciidoc b/docs/painless/painless-contexts/painless-update-context.asciidoc deleted file mode 100644 index 53b1008cfebff..0000000000000 --- a/docs/painless/painless-contexts/painless-update-context.asciidoc +++ /dev/null @@ -1,75 +0,0 @@ -[[painless-update-context]] -=== Update context - -Use a Painless script in an {ref}/docs-update.html[update] operation to -add, modify, or delete fields within a single document. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`ctx['op']` (`String`):: - The name of the operation. - -{ref}/mapping-routing-field.html[`ctx['_routing']`] (`String`, read-only):: - The value used to select a shard for document storage. - -{ref}/mapping-index-field.html[`ctx['_index']`] (`String`, read-only):: - The name of the index. - -{ref}/mapping-id-field.html[`ctx['_id']`] (`String`, read-only):: - The unique document id. - -`ctx['_version']` (`int`, read-only):: - The current version of the document. - -`ctx['_now']` (`long`, read-only):: - The current timestamp in milliseconds. - -{ref}/mapping-source-field.html[`ctx['_source']`] (`Map`):: - Contains extracted JSON in a `Map` and `List` structure for the fields - existing in a stored document. - -*Side Effects* - -`ctx['op']`:: - Use the default of `index` to update a document. Set to `none` to - specify no operation or `delete` to delete the current document from - the index. - -{ref}/mapping-source-field.html[`ctx['_source']`]:: - Modify the values in the `Map/List` structure to add, modify, or delete - the fields of a document. - -*Return* - -`void`:: - No expected return value. - -*API* - -The standard <> is available. - -*Example* - -To run this example, first follow the steps in -<>. - -The following query updates a document to be sold, and sets the cost -to the actual price paid after discounts: - -[source,console] --------------------------------------------------- -POST /seats/_update/3 -{ - "script": { - "source": "ctx._source.sold = true; ctx._source.cost = params.sold_cost", - "lang": "painless", - "params": { - "sold_cost": 26 - } - } -} --------------------------------------------------- -// TEST[setup:seats] diff --git a/docs/painless/painless-contexts/painless-watcher-condition-context.asciidoc b/docs/painless/painless-contexts/painless-watcher-condition-context.asciidoc deleted file mode 100644 index 1aad9af7bb627..0000000000000 --- a/docs/painless/painless-contexts/painless-watcher-condition-context.asciidoc +++ /dev/null @@ -1,141 +0,0 @@ -[[painless-watcher-condition-context]] -=== Watcher condition context - -Use a Painless script as a {ref}/condition-script.html[watch condition] -that determines whether to execute a watch or a particular action within a watch. -Condition scripts return a Boolean value to indicate the status of the condition. - -include::painless-watcher-context-variables.asciidoc[] - -*Return* - -`boolean`:: - Expects `true` if the condition is met, and `false` if it is not. - -*API* - -The standard <> is available. - -*Example* - -To run the examples, first follow the steps in -<>. - -[source,console] ----- -POST _watcher/watch/_execute -{ - "watch" : { - "trigger" : { "schedule" : { "interval" : "24h" } }, - "input" : { - "search" : { - "request" : { - "indices" : [ "seats" ], - "body" : { - "query" : { - "term": { "sold": "true"} - }, - "aggs" : { - "theatres" : { - "terms" : { "field" : "play" }, - "aggs" : { - "money" : { - "sum": { "field" : "cost" } - } - } - } - } - } - } - } - }, - "condition" : { - "script" : - """ - return ctx.payload.aggregations.theatres.buckets.stream() <1> - .filter(theatre -> theatre.money.value < 15000 || - theatre.money.value > 50000) <2> - .count() > 0 <3> - """ - }, - "actions" : { - "my_log" : { - "logging" : { - "text" : "The output of the search was : {{ctx.payload.aggregations.theatres.buckets}}" - } - } - } - } -} ----- -// TEST[setup:seats] - -<1> The Java Stream API is used in the condition. This API allows manipulation of -the elements of the list in a pipeline. -<2> The stream filter removes items that do not meet the filter criteria. -<3> If there is at least one item in the list, the condition evaluates to true and the watch is executed. - -The following action condition script controls execution of the my_log action based -on the value of the seats sold for the plays in the data set. The script aggregates -the total sold seats for each play and returns true if there is at least one play -that has sold over $10,000. - -[source,console] ----- -POST _watcher/watch/_execute -{ - "watch" : { - "trigger" : { "schedule" : { "interval" : "24h" } }, - "input" : { - "search" : { - "request" : { - "indices" : [ "seats" ], - "body" : { - "query" : { - "term": { "sold": "true"} - }, - "size": 0, - "aggs" : { - "theatres" : { - "terms" : { "field" : "play" }, - "aggs" : { - "money" : { - "sum": { - "field" : "cost", - "script": { - "source": "doc.cost.value * doc.number.value" - } - } - } - } - } - } - } - } - } - }, - "actions" : { - "my_log" : { - "condition": { <1> - "script" : - """ - return ctx.payload.aggregations.theatres.buckets.stream() - .anyMatch(theatre -> theatre.money.value > 10000) <2> - """ - }, - "logging" : { - "text" : "At least one play has grossed over $10,000: {{ctx.payload.aggregations.theatres.buckets}}" - } - } - } - } -} ----- -// TEST[setup:seats] - -This example uses a nearly identical condition as the previous example. The -differences below are subtle and are worth calling out. - -<1> The location of the condition is no longer at the top level, but is within -an individual action. -<2> Instead of a filter, `anyMatch` is used to return a boolean value diff --git a/docs/painless/painless-contexts/painless-watcher-context-variables.asciidoc b/docs/painless/painless-contexts/painless-watcher-context-variables.asciidoc deleted file mode 100644 index c1ef342100143..0000000000000 --- a/docs/painless/painless-contexts/painless-watcher-context-variables.asciidoc +++ /dev/null @@ -1,32 +0,0 @@ -The following variables are available in all watcher contexts. - -*Variables* - -`params` (`Map`, read-only):: - User-defined parameters passed in as part of the query. - -`ctx['watch_id']` (`String`, read-only):: - The id of the watch. - -`ctx['id']` (`String`, read-only):: - The server generated unique identifier for the run watch. - -`ctx['metadata']` (`Map`, read-only):: - Metadata can be added to the top level of the watch definition. This - is user defined and is typically used to consolidate duplicate values - in a watch. - -`ctx['execution_time']` (`ZonedDateTime`, read-only):: - The time the watch began execution. - -`ctx['trigger']['scheduled_time']` (`ZonedDateTime`, read-only):: - The scheduled trigger time for the watch. This is the time the - watch should be executed. - -`ctx['trigger']['triggered_time']` (`ZonedDateTime`, read-only):: - The actual trigger time for the watch. This is the time the - watch was triggered for execution. - -`ctx['payload']` (`Map`, read-only):: - The accessible watch data based upon the - {ref}/input.html[watch input]. diff --git a/docs/painless/painless-contexts/painless-watcher-transform-context.asciidoc b/docs/painless/painless-contexts/painless-watcher-transform-context.asciidoc deleted file mode 100644 index 991bd03f3aa3d..0000000000000 --- a/docs/painless/painless-contexts/painless-watcher-transform-context.asciidoc +++ /dev/null @@ -1,323 +0,0 @@ -[[painless-watcher-transform-context]] -=== Watcher transform context - -Use a Painless script as a {ref}/transform-script.html[watch transform] -to transform a payload into a new payload for further use in the watch. -Transform scripts return an Object value of the new payload. - -include::painless-watcher-context-variables.asciidoc[] - -*Return* - -`Object`:: - The new payload. - -*API* - -The standard <> is available. - -*Example* - -To run the examples, first follow the steps in -<>. - -[source,console] ----- -POST _watcher/watch/_execute -{ - "watch" : { - "trigger" : { "schedule" : { "interval" : "24h" } }, - "input" : { - "search" : { - "request" : { - "indices" : [ "seats" ], - "body" : { - "query" : { "term": { "sold": "true"} }, - "aggs" : { - "theatres" : { - "terms" : { "field" : "play" }, - "aggs" : { - "money" : { - "sum": { "field" : "cost" } - } - } - } - } - } - } - } - }, - "transform" : { - "script": - """ - return [ - 'money_makers': ctx.payload.aggregations.theatres.buckets.stream() <1> - .filter(t -> { <2> - return t.money.value > 50000 - }) - .map(t -> { <3> - return ['play': t.key, 'total_value': t.money.value ] - }).collect(Collectors.toList()), <4> - 'duds' : ctx.payload.aggregations.theatres.buckets.stream() <5> - .filter(t -> { - return t.money.value < 15000 - }) - .map(t -> { - return ['play': t.key, 'total_value': t.money.value ] - }).collect(Collectors.toList()) - ] - """ - }, - "actions" : { - "my_log" : { - "logging" : { - "text" : "The output of the payload was transformed to {{ctx.payload}}" - } - } - } - } -} ----- -// TEST[setup:seats] - -<1> The Java Stream API is used in the transform. This API allows manipulation of -the elements of the list in a pipeline. -<2> The stream filter removes items that do not meet the filter criteria. -<3> The stream map transforms each element into a new object. -<4> The collector reduces the stream to a `java.util.List`. -<5> This is done again for the second set of values in the transform. - -The following action transform changes each value in the mod_log action into a `String`. -This transform does not change the values in the unmod_log action. - -[source,console] ----- -POST _watcher/watch/_execute -{ - "watch" : { - "trigger" : { "schedule" : { "interval" : "24h" } }, - "input" : { - "search" : { - "request" : { - "indices" : [ "seats" ], - "body" : { - "query" : { - "term": { "sold": "true"} - }, - "aggs" : { - "theatres" : { - "terms" : { "field" : "play" }, - "aggs" : { - "money" : { - "sum": { "field" : "cost" } - } - } - } - } - } - } - } - }, - "actions" : { - "mod_log" : { - "transform": { <1> - "script" : - """ - def formatter = NumberFormat.getCurrencyInstance(); - return [ - 'msg': ctx.payload.aggregations.theatres.buckets.stream() - .map(t-> formatter.format(t.money.value) + ' for the play ' + t.key) - .collect(Collectors.joining(", ")) - ] - """ - }, - "logging" : { - "text" : "The output of the payload was transformed to: {{ctx.payload.msg}}" - } - }, - "unmod_log" : { <2> - "logging" : { - "text" : "The output of the payload was not transformed and this value should not exist: {{ctx.payload.msg}}" - } - } - } - } -} ----- -// TEST[setup:seats] - -This example uses the streaming API in a very similar manner. The differences below are -subtle and worth calling out. - -<1> The location of the transform is no longer at the top level, but is within -an individual action. -<2> A second action that does not transform the payload is given for reference. - -The following example shows scripted watch and action transforms within the -context of a complete watch. This watch also uses a scripted -<>. - -[source,console] ----- -POST _watcher/watch/_execute -{ - "watch" : { - "metadata" : { "high_threshold": 4000, "low_threshold": 1000 }, - "trigger" : { "schedule" : { "interval" : "24h" } }, - "input" : { - "search" : { - "request" : { - "indices" : [ "seats" ], - "body" : { - "query" : { - "term": { "sold": "true"} - }, - "aggs" : { - "theatres" : { - "terms" : { "field" : "play" }, - "aggs" : { - "money" : { - "sum": { - "field" : "cost", - "script": { - "source": "doc.cost.value * doc.number.value" - } - } - } - } - } - } - } - } - } - }, - "condition" : { - "script" : - """ - return ctx.payload.aggregations.theatres.buckets.stream() - .anyMatch(theatre -> theatre.money.value < ctx.metadata.low_threshold || - theatre.money.value > ctx.metadata.high_threshold) - """ - }, - "transform" : { - "script": - """ - return [ - 'money_makers': ctx.payload.aggregations.theatres.buckets.stream() - .filter(t -> { - return t.money.value > ctx.metadata.high_threshold - }) - .map(t -> { - return ['play': t.key, 'total_value': t.money.value ] - }).collect(Collectors.toList()), - 'duds' : ctx.payload.aggregations.theatres.buckets.stream() - .filter(t -> { - return t.money.value < ctx.metadata.low_threshold - }) - .map(t -> { - return ['play': t.key, 'total_value': t.money.value ] - }).collect(Collectors.toList()) - ] - """ - }, - "actions" : { - "log_money_makers" : { - "condition": { - "script" : "return ctx.payload.money_makers.size() > 0" - }, - "transform": { - "script" : - """ - def formatter = NumberFormat.getCurrencyInstance(); - return [ - 'plays_value': ctx.payload.money_makers.stream() - .map(t-> formatter.format(t.total_value) + ' for the play ' + t.play) - .collect(Collectors.joining(", ")) - ] - """ - }, - "logging" : { - "text" : "The following plays contain the highest grossing total income: {{ctx.payload.plays_value}}" - } - }, - "log_duds" : { - "condition": { - "script" : "return ctx.payload.duds.size() > 0" - }, - "transform": { - "script" : - """ - def formatter = NumberFormat.getCurrencyInstance(); - return [ - 'plays_value': ctx.payload.duds.stream() - .map(t-> formatter.format(t.total_value) + ' for the play ' + t.play) - .collect(Collectors.joining(", ")) - ] - """ - }, - "logging" : { - "text" : "The following plays need more advertising due to their low total income: {{ctx.payload.plays_value}}" - } - } - } - } -} ----- -// TEST[setup:seats] - -The following example shows the use of metadata and transforming dates into a readable format. - -[source,console] ----- -POST _watcher/watch/_execute -{ - "watch" : { - "metadata" : { "min_hits": 10 }, - "trigger" : { "schedule" : { "interval" : "24h" } }, - "input" : { - "search" : { - "request" : { - "indices" : [ "seats" ], - "body" : { - "query" : { - "term": { "sold": "true"} - }, - "aggs" : { - "theatres" : { - "terms" : { "field" : "play" }, - "aggs" : { - "money" : { - "sum": { "field" : "cost" } - } - } - } - } - } - } - } - }, - "condition" : { - "script" : - """ - return ctx.payload.hits.total > ctx.metadata.min_hits - """ - }, - "transform" : { - "script" : - """ - def theDate = ZonedDateTime.ofInstant(ctx.execution_time.toInstant(), ctx.execution_time.getZone()); - return ['human_date': DateTimeFormatter.RFC_1123_DATE_TIME.format(theDate), - 'aggregations': ctx.payload.aggregations] - """ - }, - "actions" : { - "my_log" : { - "logging" : { - "text" : "The watch was successfully executed on {{ctx.payload.human_date}} and contained {{ctx.payload.aggregations.theatres.buckets.size}} buckets" - } - } - } - } -} ----- -// TEST[setup:seats] diff --git a/docs/painless/painless-contexts/painless-weight-context.asciidoc b/docs/painless/painless-contexts/painless-weight-context.asciidoc deleted file mode 100644 index 47b9df0e7cb66..0000000000000 --- a/docs/painless/painless-contexts/painless-weight-context.asciidoc +++ /dev/null @@ -1,42 +0,0 @@ -[[painless-weight-context]] -=== Weight context - -Use a Painless script to create a -{ref}/index-modules-similarity.html[weight] for use in a -<>. The weight makes up the -part of the similarity calculation that is independent of the document being -scored, and so can be built up front and cached. - -Queries that contain multiple terms calculate a separate weight for each term. - -*Variables* - -`query.boost` (`float`, read-only):: - The boost value if provided by the query. If this is not provided the - value is `1.0f`. - -`field.docCount` (`long`, read-only):: - The number of documents that have a value for the current field. - -`field.sumDocFreq` (`long`, read-only):: - The sum of all terms that exist for the current field. If this is not - available the value is `-1`. - -`field.sumTotalTermFreq` (`long`, read-only):: - The sum of occurrences in the index for all the terms that exist in the - current field. If this is not available the value is `-1`. - -`term.docFreq` (`long`, read-only):: - The number of documents that contain the current term in the index. - -`term.totalTermFreq` (`long`, read-only):: - The total occurrences of the current term in the index. - -*Return* - -`double`:: - A scoring factor used across all documents. - -*API* - -The standard <> is available. diff --git a/docs/painless/painless-guide.asciidoc b/docs/painless/painless-guide.asciidoc deleted file mode 100644 index 2d79445915c7d..0000000000000 --- a/docs/painless/painless-guide.asciidoc +++ /dev/null @@ -1,29 +0,0 @@ -[[painless-guide]] -== Painless Guide - -_Painless_ is a simple, secure scripting language designed specifically for use -with Elasticsearch. It is the default scripting language for Elasticsearch and -can safely be used for inline and stored scripts. For a jump start into -Painless, see <>. For a -detailed description of the Painless syntax and language features, see the -<>. - -You can use Painless anywhere scripts are used in Elasticsearch. Painless -provides: - -* Fast performance: Painless scripts https://benchmarks.elastic.co/index.html#search_qps_scripts[ -run several times faster] than the alternatives. - -* Safety: Fine-grained allowlist with method call/field granularity. See the -{painless}/painless-api-reference.html[Painless API Reference] for a -complete list of available classes and methods. - -* Optional typing: Variables and parameters can use explicit types or the -dynamic `def` type. - -* Syntax: Extends a subset of Java's syntax to provide additional scripting -language features. - -* Optimizations: Designed specifically for Elasticsearch scripting. - -include::painless-guide/index.asciidoc[] \ No newline at end of file diff --git a/docs/painless/painless-guide/index.asciidoc b/docs/painless/painless-guide/index.asciidoc deleted file mode 100644 index 74db0897e1520..0000000000000 --- a/docs/painless/painless-guide/index.asciidoc +++ /dev/null @@ -1,14 +0,0 @@ -include::painless-walkthrough.asciidoc[] - -include::painless-runtime-fields.asciidoc[] - -include::painless-datetime.asciidoc[] - -include::painless-method-dispatch.asciidoc[] - -include::painless-debugging.asciidoc[] - -include::painless-execute-script.asciidoc[] - -include::painless-ingest.asciidoc[] - diff --git a/docs/painless/painless-guide/painless-datetime.asciidoc b/docs/painless/painless-guide/painless-datetime.asciidoc deleted file mode 100644 index 685df9f66f21f..0000000000000 --- a/docs/painless/painless-guide/painless-datetime.asciidoc +++ /dev/null @@ -1,898 +0,0 @@ -[[painless-datetime]] -=== Using Datetime in Painless - -==== Datetime API - -Datetimes in Painless use the standard Java libraries and are available through -the Painless <>. Most of the classes -from the following Java packages are available to use in Painless scripts: - -* <> -* <> -* <> -* <> -* <> - -==== Datetime Representation - -Datetimes in Painless are most commonly represented as a numeric value, a -string value, or a complex value. - -numeric:: a datetime representation as a number from a starting offset called -an epoch; in Painless this is typically a <> as -milliseconds since an epoch of 1970-01-01 00:00:00 Zulu Time -string:: a datetime representation as a sequence of characters defined by -a standard format or a custom format; in Painless this is typically a -<> of the standard format -{wikipedia}/ISO_8601[ISO 8601] -complex:: a datetime representation as a complex type -(<>) that abstracts away internal details of how the -datetime is stored and often provides utilities for modification and -comparison; in Painless this is typically a -<> - -Switching between different representations of datetimes is often necessary to -achieve a script's objective(s). A typical pattern in a script is to switch a -numeric or string datetime to a complex datetime, modify or compare the complex -datetime, and then switch it back to a numeric or string datetime for storage -or to return a result. - -==== Datetime Parsing and Formatting - -Datetime parsing is a switch from a string datetime to a complex datetime, and -datetime formatting is a switch from a complex datetime to a string datetime. - -A <> is a -complex type (<>) that defines the allowed sequence -of characters for a string datetime. Datetime parsing and formatting often -require a DateTimeFormatter. For more information about how to use a -DateTimeFormatter see the -{java11-javadoc}/java.base/java/time/format/DateTimeFormatter.html[Java documentation]. - -===== Datetime Parsing Examples - -* parse from milliseconds -+ -[source,Painless] ----- -String milliSinceEpochString = "434931330000"; -long milliSinceEpoch = Long.parseLong(milliSinceEpochString); -Instant instant = Instant.ofEpochMilli(milliSinceEpoch); -ZonedDateTime zdt = ZonedDateTime.ofInstant(instant, ZoneId.of('Z')); ----- -+ -* parse from ISO 8601 -+ -[source,Painless] ----- -String datetime = '1983-10-13T22:15:30Z'; -ZonedDateTime zdt = ZonedDateTime.parse(datetime); <1> ----- -<1> Note the parse method uses ISO 8601 by default. -+ -* parse from RFC 1123 -+ -[source,Painless] ----- -String datetime = 'Thu, 13 Oct 1983 22:15:30 GMT'; -ZonedDateTime zdt = ZonedDateTime.parse(datetime, - DateTimeFormatter.RFC_1123_DATE_TIME); <1> ----- -<1> Note the use of a built-in DateTimeFormatter. -+ -* parse from a custom format -+ -[source,Painless] ----- -String datetime = 'custom y 1983 m 10 d 13 22:15:30 Z'; -DateTimeFormatter dtf = DateTimeFormatter.ofPattern( - "'custom' 'y' yyyy 'm' MM 'd' dd HH:mm:ss VV"); -ZonedDateTime zdt = ZonedDateTime.parse(datetime, dtf); <1> ----- -<1> Note the use of a custom DateTimeFormatter. - -===== Datetime Formatting Examples - -* format to ISO 8601 -+ -[source,Painless] ----- -ZonedDateTime zdt = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 0, ZoneId.of('Z')); -String datetime = zdt.format(DateTimeFormatter.ISO_INSTANT); <1> ----- -<1> Note the use of a built-in DateTimeFormatter. -+ -* format to a custom format -+ -[source,Painless] ----- -ZonedDateTime zdt = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 0, ZoneId.of('Z')); -DateTimeFormatter dtf = DateTimeFormatter.ofPattern( - "'date:' yyyy/MM/dd 'time:' HH:mm:ss"); -String datetime = zdt.format(dtf); <1> ----- -<1> Note the use of a custom DateTimeFormatter. - -==== Datetime Conversion - -Datetime conversion is a switch from a numeric datetime to a complex datetime -and vice versa. - -===== Datetime Conversion Examples - -* convert from milliseconds -+ -[source,Painless] ----- -long milliSinceEpoch = 434931330000L; -Instant instant = Instant.ofEpochMilli(milliSinceEpoch); -ZonedDateTime zdt = ZonedDateTime.ofInstant(instant, ZoneId.of('Z')); ----- -+ -* convert to milliseconds -+ -[source,Painless] ------ -ZonedDateTime zdt = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 0, ZoneId.of('Z')); -long milliSinceEpoch = zdt.toInstant().toEpochMilli(); ------ - -==== Datetime Pieces - -Datetime representations often contain the data to extract individual datetime -pieces such as year, hour, timezone, etc. Use individual pieces of a datetime -to create a complex datetime, and use a complex datetime to extract individual -pieces. - -===== Datetime Pieces Examples - -* create a complex datetime from pieces -+ -[source,Painless] ----- -int year = 1983; -int month = 10; -int day = 13; -int hour = 22; -int minutes = 15; -int seconds = 30; -int nanos = 0; -ZonedDateTime zdt = ZonedDateTime.of( - year, month, day, hour, minutes, seconds, nanos, ZoneId.of('Z')); ----- -+ -* extract pieces from a complex datetime -+ -[source,Painless] ----- -ZonedDateTime zdt = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 100, ZoneId.of(tz)); -int year = zdt.getYear(); -int month = zdt.getMonthValue(); -int day = zdt.getDayOfMonth(); -int hour = zdt.getHour(); -int minutes = zdt.getMinute(); -int seconds = zdt.getSecond(); -int nanos = zdt.getNano(); ----- - -==== Datetime Modification - -Use either a numeric datetime or a complex datetime to do modification such as -adding several seconds to a datetime or subtracting several days from a -datetime. Use standard <> to -modify a numeric datetime. Use -<> (or fields) to modify -a complex datetime. Note many complex datetimes are immutable so upon -modification a new complex datetime is created that requires -<> or immediate use. - -===== Datetime Modification Examples - -* Subtract three seconds from a numeric datetime in milliseconds -+ -[source,Painless] ----- -long milliSinceEpoch = 434931330000L; -milliSinceEpoch = milliSinceEpoch - 1000L*3L; ----- -+ -* Add three days to a complex datetime -+ -[source,Painless] ----- -ZonedDateTime zdt = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 0, ZoneId.of('Z')); -ZonedDateTime updatedZdt = zdt.plusDays(3); ----- -+ -* Subtract 125 minutes from a complex datetime -+ -[source,Painless] ----- -ZonedDateTime zdt = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 0, ZoneId.of('Z')); -ZonedDateTime updatedZdt = zdt.minusMinutes(125); ----- -+ -* Set the year on a complex datetime -+ -[source,Painless] ----- -ZonedDateTime zdt = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 0, ZoneId.of('Z')); -ZonedDateTime updatedZdt = zdt.withYear(1976); ----- - -==== Datetime Difference (Elapsed Time) - -Use either two numeric datetimes or two complex datetimes to calculate the -difference (elapsed time) between two different datetimes. Use -<> to calculate the difference between two -numeric datetimes of the same time unit such as milliseconds. For -complex datetimes there is often a method or another complex type -(<>) available to calculate the difference. Use -<> -to calculate the difference between two complex datetimes if supported. - -===== Datetime Difference Examples - -* Difference in milliseconds between two numeric datetimes -+ -[source,Painless] ----- -long startTimestamp = 434931327000L; -long endTimestamp = 434931330000L; -long differenceInMillis = endTimestamp - startTimestamp; ----- -+ -* Difference in milliseconds between two complex datetimes -+ -[source,Painless] ----- -ZonedDateTime zdt1 = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 11000000, ZoneId.of('Z')); -ZonedDateTime zdt2 = - ZonedDateTime.of(1983, 10, 13, 22, 15, 35, 0, ZoneId.of('Z')); -long differenceInMillis = ChronoUnit.MILLIS.between(zdt1, zdt2); ----- -+ -* Difference in days between two complex datetimes -+ -[source,Painless] ----- -ZonedDateTime zdt1 = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 11000000, ZoneId.of('Z')); -ZonedDateTime zdt2 = - ZonedDateTime.of(1983, 10, 17, 22, 15, 35, 0, ZoneId.of('Z')); -long differenceInDays = ChronoUnit.DAYS.between(zdt1, zdt2); ----- - -==== Datetime Comparison - -Use either two numeric datetimes or two complex datetimes to do a datetime -comparison. Use standard <> -to compare two numeric datetimes of the same time unit such as milliseconds. -For complex datetimes there is often a method or another complex type -(<>) available to do the comparison. - -===== Datetime Comparison Examples - -* Greater than comparison of two numeric datetimes in milliseconds -+ -[source,Painless] ----- -long timestamp1 = 434931327000L; -long timestamp2 = 434931330000L; - -if (timestamp1 > timestamp2) { - // handle condition -} ----- -+ -* Equality comparison of two complex datetimes -+ -[source,Painless] ----- -ZonedDateTime zdt1 = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 0, ZoneId.of('Z')); -ZonedDateTime zdt2 = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 0, ZoneId.of('Z')); - -if (zdt1.equals(zdt2)) { - // handle condition -} ----- -+ -* Less than comparison of two complex datetimes -+ -[source,Painless] ----- -ZonedDateTime zdt1 = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 0, ZoneId.of('Z')); -ZonedDateTime zdt2 = - ZonedDateTime.of(1983, 10, 17, 22, 15, 35, 0, ZoneId.of('Z')); - -if (zdt1.isBefore(zdt2)) { - // handle condition -} ----- -+ -* Greater than comparison of two complex datetimes -+ -[source,Painless] ----- -ZonedDateTime zdt1 = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 0, ZoneId.of('Z')); -ZonedDateTime zdt2 = - ZonedDateTime.of(1983, 10, 17, 22, 15, 35, 0, ZoneId.of('Z')); - -if (zdt1.isAfter(zdt2)) { - // handle condition -} ----- - -==== Datetime Zone - -Both string datetimes and complex datetimes have a timezone with a default of -`UTC`. Numeric datetimes do not have enough explicit information to -have a timezone, so `UTC` is always assumed. Use -<> (or fields) in -conjunction with a <> to change -the timezone for a complex datetime. Parse a string datetime into a complex -datetime to change the timezone, and then format the complex datetime back into -a desired string datetime. Note many complex datetimes are immutable so upon -modification a new complex datetime is created that requires -<> or immediate use. - -===== Datetime Zone Examples - -* Modify the timezone for a complex datetime -+ -[source,Painless] ----- -ZonedDateTime utc = - ZonedDateTime.of(1983, 10, 13, 22, 15, 30, 0, ZoneId.of('Z')); -ZonedDateTime pst = utc.withZoneSameInstant(ZoneId.of('America/Los_Angeles')); ----- -+ -* Modify the timezone for a string datetime -+ -[source,Painless] ----- -String gmtString = 'Thu, 13 Oct 1983 22:15:30 GMT'; -ZonedDateTime gmtZdt = ZonedDateTime.parse(gmtString, - DateTimeFormatter.RFC_1123_DATE_TIME); <1> -ZonedDateTime pstZdt = - gmtZdt.withZoneSameInstant(ZoneId.of('America/Los_Angeles')); -String pstString = pstZdt.format(DateTimeFormatter.RFC_1123_DATE_TIME); ----- -<1> Note the use of a built-in DateTimeFormatter. - -==== Datetime Input - -There are several common ways datetimes are used as input for a script -determined by the <>. Typically, datetime -input will be accessed from parameters specified by the user, from an original -source document, or from an indexed document. - -===== Datetime Input From User Parameters - -Use the {ref}/modules-scripting-using.html[params section] -during script specification to pass in a numeric datetime or string datetime as -a script input. Access to user-defined parameters within a script is dependent -on the Painless context, though, the parameters are most commonly accessible -through an input called `params`. - -*Examples* - -* Parse a numeric datetime from user parameters to a complex datetime -+ -** Input: -+ -[source,JSON] ----- -... -"script": { - ... - "params": { - "input_datetime": 434931327000 - } -} -... ----- -+ -** Script: -+ -[source,Painless] ----- -long inputDateTime = params['input_datetime']; -Instant instant = Instant.ofEpochMilli(inputDateTime); -ZonedDateTime zdt = ZonedDateTime.ofInstant(instant, ZoneId.of('Z')); ----- -+ -* Parse a string datetime from user parameters to a complex datetime -+ -** Input: -+ -[source,JSON] ----- -... -"script": { - ... - "params": { - "input_datetime": "custom y 1983 m 10 d 13 22:15:30 Z" - } -} -... ----- -+ -** Script: -+ -[source,Painless] ----- -String datetime = params['input_datetime']; -DateTimeFormatter dtf = DateTimeFormatter.ofPattern( - "'custom' 'y' yyyy 'm' MM 'd' dd HH:mm:ss VV"); -ZonedDateTime zdt = ZonedDateTime.parse(datetime, dtf); <1> ----- -<1> Note the use of a custom DateTimeFormatter. - -===== Datetime Input From a Source Document - -Use an original {ref}/mapping-source-field.html[source] document as a script -input to access a numeric datetime or string datetime for a specific field -within that document. Access to an original source document within a script is -dependent on the Painless context and is not always available. An original -source document is most commonly accessible through an input called -`ctx['_source']` or `params['_source']`. - -*Examples* - -* Parse a numeric datetime from a sourced document to a complex datetime -+ -** Input: -+ -[source,JSON] ----- -{ - ... - "input_datetime": 434931327000 - ... -} ----- -+ -** Script: -+ -[source,Painless] ----- -long inputDateTime = ctx['_source']['input_datetime']; <1> -Instant instant = Instant.ofEpochMilli(inputDateTime); -ZonedDateTime zdt = ZonedDateTime.ofInstant(instant, ZoneId.of('Z')); ----- -<1> Note access to `_source` is dependent on the Painless context. -+ -* Parse a string datetime from a sourced document to a complex datetime -+ -** Input: -+ -[source,JSON] ----- -{ - ... - "input_datetime": "1983-10-13T22:15:30Z" - ... -} ----- -+ -** Script: -+ -[source,Painless] ----- -String datetime = params['_source']['input_datetime']; <1> -ZonedDateTime zdt = ZonedDateTime.parse(datetime); <2> ----- -<1> Note access to `_source` is dependent on the Painless context. -<2> Note the parse method uses ISO 8601 by default. - -===== Datetime Input From an Indexed Document - -Use an indexed document as a script input to access a complex datetime for a -specific field within that document where the field is mapped as a -{ref}/date.html[standard date] or a {ref}/date_nanos.html[nanosecond date]. -Numeric datetime fields mapped as {ref}/number.html[numeric] and string -datetime fields mapped as {ref}/keyword.html[keyword] are accessible through an -indexed document as well. Access to an indexed document within a script is -dependent on the Painless context and is not always available. An indexed -document is most commonly accessible through an input called `doc`. - -*Examples* - -* Format a complex datetime from an indexed document to a string datetime -+ -** Assumptions: -+ -*** The field `input_datetime` exists in all indexes as part of the query -*** All indexed documents contain the field `input_datetime` -+ -** Mappings: -+ -[source,JSON] ----- -{ - "mappings": { - ... - "properties": { - ... - "input_datetime": { - "type": "date" - } - ... - } - ... - } -} ----- -+ -** Script: -+ -[source,Painless] ----- -ZonedDateTime input = doc['input_datetime'].value; -String output = input.format(DateTimeFormatter.ISO_INSTANT); <1> ----- -<1> Note the use of a built-in DateTimeFormatter. -+ -* Find the difference between two complex datetimes from an indexed document -+ -** Assumptions: -+ -*** The fields `start` and `end` may *not* exist in all indexes as part of the -query -*** The fields `start` and `end` may *not* have values in all indexed documents -+ -** Mappings: -+ -[source,JSON] ----- -{ - "mappings": { - ... - "properties": { - ... - "start": { - "type": "date" - }, - "end": { - "type": "date" - } - ... - } - ... - } -} ----- -+ -** Script: -+ -[source,Painless] ----- -if (doc.containsKey('start') && doc.containsKey('end')) { <1> - - if (doc['start'].size() > 0 && doc['end'].size() > 0) { <2> - - ZonedDateTime start = doc['start'].value; - ZonedDateTime end = doc['end'].value; - long differenceInMillis = ChronoUnit.MILLIS.between(start, end); - - // handle difference in times - } else { - // handle fields without values - } -} else { - // handle index with missing fields -} ----- -<1> When a query's results span multiple indexes, some indexes may not -contain a specific field. Use the `containsKey` method call on the `doc` input -to ensure a field exists as part of the index for the current document. -<2> Some fields within a document may have no values. Use the `size` method -call on a field within the `doc` input to ensure that field has at least one -value for the current document. - -==== Datetime Now - -Under most Painless contexts the current datetime, `now`, is not supported. -There are two primary reasons for this. The first is that scripts are often run once -per document, so each time the script is run a different `now` is returned. The -second is that scripts are often run in a distributed fashion without a way to -appropriately synchronize `now`. Instead, pass in a user-defined parameter with -either a string datetime or numeric datetime for `now`. A numeric datetime is -preferred as there is no need to parse it for comparison. - -===== Datetime Now Examples - -* Use a numeric datetime as `now` -+ -** Assumptions: -+ -*** The field `input_datetime` exists in all indexes as part of the query -*** All indexed documents contain the field `input_datetime` -+ -** Mappings: -+ -[source,JSON] ----- -{ - "mappings": { - ... - "properties": { - ... - "input_datetime": { - "type": "date" - } - ... - } - ... - } -} ----- -+ -** Input: -+ -[source,JSON] ----- -... -"script": { - ... - "params": { - "now": - } -} -... ----- -+ -** Script: -+ -[source,Painless] ----- -long now = params['now']; -ZonedDateTime inputDateTime = doc['input_datetime']; -long millisDateTime = inputDateTime.toInstant().toEpochMilli(); -long elapsedTime = now - millisDateTime; ----- -+ -* Use a string datetime as `now` -+ -** Assumptions: -+ -*** The field `input_datetime` exists in all indexes as part of the query -*** All indexed documents contain the field `input_datetime` -+ -** Mappings: -+ -[source,JSON] ----- -{ - "mappings": { - ... - "properties": { - ... - "input_datetime": { - "type": "date" - } - ... - } - ... - } -} ----- -+ -** Input: -+ -[source,JSON] ----- -... -"script": { - ... - "params": { - "now": "" - } -} -... ----- -+ -** Script: -+ -[source,Painless] ----- -String nowString = params['now']; -ZonedDateTime nowZdt = ZonedDateTime.parse(nowString); <1> -long now = ZonedDateTime.toInstant().toEpochMilli(); -ZonedDateTime inputDateTime = doc['input_datetime']; -long millisDateTime = zdt.toInstant().toEpochMilli(); -long elapsedTime = now - millisDateTime; ----- -<1> Note this parses the same string datetime every time the script runs. Use a -numeric datetime to avoid a significant performance hit. - -==== Datetime Examples in Contexts - -===== Load the Example Data - -Run the following curl commands to load the data necessary for the context -examples into an Elasticsearch cluster: - -. Create {ref}/mapping.html[mappings] for the sample data. -+ -[source,console] ----- -PUT /messages -{ - "mappings": { - "properties": { - "priority": { - "type": "integer" - }, - "datetime": { - "type": "date" - }, - "message": { - "type": "text" - } - } - } -} ----- -+ -. Load the sample data. -+ -[source,console] ----- -POST /_bulk -{ "index" : { "_index" : "messages", "_id" : "1" } } -{ "priority": 1, "datetime": "2019-07-17T12:13:14Z", "message": "m1" } -{ "index" : { "_index" : "messages", "_id" : "2" } } -{ "priority": 1, "datetime": "2019-07-24T01:14:59Z", "message": "m2" } -{ "index" : { "_index" : "messages", "_id" : "3" } } -{ "priority": 2, "datetime": "1983-10-14T00:36:42Z", "message": "m3" } -{ "index" : { "_index" : "messages", "_id" : "4" } } -{ "priority": 3, "datetime": "1983-10-10T02:15:15Z", "message": "m4" } -{ "index" : { "_index" : "messages", "_id" : "5" } } -{ "priority": 3, "datetime": "1983-10-10T17:18:19Z", "message": "m5" } -{ "index" : { "_index" : "messages", "_id" : "6" } } -{ "priority": 1, "datetime": "2019-08-03T17:19:31Z", "message": "m6" } -{ "index" : { "_index" : "messages", "_id" : "7" } } -{ "priority": 3, "datetime": "2019-08-04T17:20:00Z", "message": "m7" } -{ "index" : { "_index" : "messages", "_id" : "8" } } -{ "priority": 2, "datetime": "2019-08-04T18:01:01Z", "message": "m8" } -{ "index" : { "_index" : "messages", "_id" : "9" } } -{ "priority": 3, "datetime": "1983-10-10T19:00:45Z", "message": "m9" } -{ "index" : { "_index" : "messages", "_id" : "10" } } -{ "priority": 2, "datetime": "2019-07-23T23:39:54Z", "message": "m10" } ----- -// TEST[continued] - -===== Day-of-the-Week Bucket Aggregation Example - -The following example uses a -{ref}/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-script[terms aggregation] -as part of the -<> to -display the number of messages from each day-of-the-week. - -[source,console] ----- -GET /messages/_search?pretty=true -{ - "aggs": { - "day-of-week-count": { - "terms": { - "script": "return doc[\"datetime\"].value.getDayOfWeekEnum();" - } - } - } -} ----- -// TEST[continued] - -===== Morning/Evening Bucket Aggregation Example - -The following example uses a -{ref}/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-script[terms aggregation] -as part of the -<> to -display the number of messages received in the morning versus the evening. - -[source,console] ----- -GET /messages/_search?pretty=true -{ - "aggs": { - "am-pm-count": { - "terms": { - "script": "return doc[\"datetime\"].value.getHour() < 12 ? \"AM\" : \"PM\";" - } - } - } -} ----- -// TEST[continued] - -===== Age of a Message Script Field Example - -The following example uses a -{ref}/search-fields.html#script-fields[script field] as part of the -<> to display the elapsed time between -"now" and when a message was received. - -[source,console] ----- -GET /_search?pretty=true -{ - "query": { - "match_all": {} - }, - "script_fields": { - "message_age": { - "script": { - "source": "ZonedDateTime now = ZonedDateTime.ofInstant(Instant.ofEpochMilli(params[\"now\"]), ZoneId.of(\"Z\")); ZonedDateTime mdt = doc[\"datetime\"].value; String age; long years = mdt.until(now, ChronoUnit.YEARS); age = years + \"Y \"; mdt = mdt.plusYears(years); long months = mdt.until(now, ChronoUnit.MONTHS); age += months + \"M \"; mdt = mdt.plusMonths(months); long days = mdt.until(now, ChronoUnit.DAYS); age += days + \"D \"; mdt = mdt.plusDays(days); long hours = mdt.until(now, ChronoUnit.HOURS); age += hours + \"h \"; mdt = mdt.plusHours(hours); long minutes = mdt.until(now, ChronoUnit.MINUTES); age += minutes + \"m \"; mdt = mdt.plusMinutes(minutes); long seconds = mdt.until(now, ChronoUnit.SECONDS); age += hours + \"s\"; return age;", - "params": { - "now": 1574005645830 - } - } - } - } -} ----- -// TEST[continued] - -The following shows the script broken into multiple lines: - -[source,Painless] ----- -ZonedDateTime now = ZonedDateTime.ofInstant( - Instant.ofEpochMilli(params['now']), ZoneId.of('Z')); <1> -ZonedDateTime mdt = doc['datetime'].value; <2> - -String age; - -long years = mdt.until(now, ChronoUnit.YEARS); <3> -age = years + 'Y '; <4> -mdt = mdt.plusYears(years); <5> - -long months = mdt.until(now, ChronoUnit.MONTHS); -age += months + 'M '; -mdt = mdt.plusMonths(months); - -long days = mdt.until(now, ChronoUnit.DAYS); -age += days + 'D '; -mdt = mdt.plusDays(days); - -long hours = mdt.until(now, ChronoUnit.HOURS); -age += hours + 'h '; -mdt = mdt.plusHours(hours); - -long minutes = mdt.until(now, ChronoUnit.MINUTES); -age += minutes + 'm '; -mdt = mdt.plusMinutes(minutes); - -long seconds = mdt.until(now, ChronoUnit.SECONDS); -age += hours + 's'; - -return age; <6> ----- -<1> Parse the datetime "now" as input from the user-defined params. -<2> Store the datetime the message was received as a `ZonedDateTime`. -<3> Find the difference in years between "now" and the datetime the message was -received. -<4> Add the difference in years later returned in the format -`Y ...` for the age of a message. -<5> Add the years so only the remainder of the months, days, etc. remain as the -difference between "now" and the datetime the message was received. Repeat this -pattern until the desired granularity is reached (seconds in this example). -<6> Return the age of the message in the format -`Y M D h m s `. diff --git a/docs/painless/painless-guide/painless-debugging.asciidoc b/docs/painless/painless-guide/painless-debugging.asciidoc deleted file mode 100644 index afd837059646e..0000000000000 --- a/docs/painless/painless-guide/painless-debugging.asciidoc +++ /dev/null @@ -1,92 +0,0 @@ -[[painless-debugging]] -=== Painless Debugging - -==== Debug.Explain - -Painless doesn't have a -{wikipedia}/Read%E2%80%93eval%E2%80%93print_loop[REPL] -and while it'd be nice for it to have one day, it wouldn't tell you the -whole story around debugging painless scripts embedded in Elasticsearch because -the data that the scripts have access to or "context" is so important. For now -the best way to debug embedded scripts is by throwing exceptions at choice -places. While you can throw your own exceptions -(`throw new Exception('whatever')`), Painless's sandbox prevents you from -accessing useful information like the type of an object. So Painless has a -utility method, `Debug.explain` which throws the exception for you. For -example, you can use {ref}/search-explain.html[`_explain`] to explore the -context available to a {ref}/query-dsl-script-query.html[script query]. - -[source,console] ---------------------------------------------------------- -PUT /hockey/_doc/1?refresh -{"first":"johnny","last":"gaudreau","goals":[9,27,1],"assists":[17,46,0],"gp":[26,82,1]} - -POST /hockey/_explain/1 -{ - "query": { - "script": { - "script": "Debug.explain(doc.goals)" - } - } -} ---------------------------------------------------------- -// TEST[s/_explain\/1/_explain\/1?error_trace=false/ catch:/painless_explain_error/] -// The test system sends error_trace=true by default for easier debugging so -// we have to override it to get a normal shaped response - -Which shows that the class of `doc.first` is -`org.elasticsearch.index.fielddata.ScriptDocValues.Longs` by responding with: - -[source,console-result] ---------------------------------------------------------- -{ - "error": { - "type": "script_exception", - "to_string": "[1, 9, 27]", - "painless_class": "org.elasticsearch.index.fielddata.ScriptDocValues.Longs", - "java_class": "org.elasticsearch.index.fielddata.ScriptDocValues$Longs", - ... - }, - "status": 400 -} ---------------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"script_stack": $body.error.script_stack, "script": $body.error.script, "lang": $body.error.lang, "position": $body.error.position, "caused_by": $body.error.caused_by, "root_cause": $body.error.root_cause, "reason": $body.error.reason/] - -You can use the same trick to see that `_source` is a `LinkedHashMap` -in the `_update` API: - -[source,console] ---------------------------------------------------------- -POST /hockey/_update/1 -{ - "script": "Debug.explain(ctx._source)" -} ---------------------------------------------------------- -// TEST[continued s/_update\/1/_update\/1?error_trace=false/ catch:/painless_explain_error/] - -The response looks like: - -[source,console-result] ---------------------------------------------------------- -{ - "error" : { - "root_cause": ..., - "type": "illegal_argument_exception", - "reason": "failed to execute script", - "caused_by": { - "type": "script_exception", - "to_string": "{gp=[26, 82, 1], last=gaudreau, assists=[17, 46, 0], first=johnny, goals=[9, 27, 1]}", - "painless_class": "java.util.LinkedHashMap", - "java_class": "java.util.LinkedHashMap", - ... - } - }, - "status": 400 -} ---------------------------------------------------------- -// TESTRESPONSE[s/"root_cause": \.\.\./"root_cause": $body.error.root_cause/] -// TESTRESPONSE[s/\.\.\./"script_stack": $body.error.caused_by.script_stack, "script": $body.error.caused_by.script, "lang": $body.error.caused_by.lang, "position": $body.error.caused_by.position, "caused_by": $body.error.caused_by.caused_by, "reason": $body.error.caused_by.reason/] -// TESTRESPONSE[s/"to_string": ".+"/"to_string": $body.error.caused_by.to_string/] - -Once you have a class you can go to <> to see a list of -available methods. diff --git a/docs/painless/painless-guide/painless-execute-script.asciidoc b/docs/painless/painless-guide/painless-execute-script.asciidoc deleted file mode 100644 index 771a6818d45e8..0000000000000 --- a/docs/painless/painless-guide/painless-execute-script.asciidoc +++ /dev/null @@ -1,925 +0,0 @@ -[[painless-execute-api]] -=== Painless execute API -experimental::[] - -The Painless execute API runs a script and returns a result. - -[[painless-execute-api-request]] -==== {api-request-title} -`POST /_scripts/painless/_execute` - -[[painless-execute-api-desc]] -==== {api-description-title} -Use this API to build and test scripts, such as when defining a script for a -{ref}/runtime.html[runtime field]. This API requires very few dependencies, and is -especially useful if you don't have permissions to write documents on a cluster. - -The API uses several _contexts_, which control how scripts are executed, what -variables are available at runtime, and what the return type is. - -Each context requires a script, but additional parameters depend on the context -you're using for that script. - -[[painless-execute-api-request-body]] -==== {api-request-body-title} -`script`:: (Required, object) -The Painless script to execute. -+ -.Properties of `script` --- -include::../painless-contexts/painless-runtime-fields-context.asciidoc[tag=runtime-field-emit] --- - -[[_contexts]] -`context`:: (Optional, string) -The context that the script should run in. Defaults to `painless_test` if no -context is specified. -+ -.Properties of `context` -[%collapsible%open] -==== -`painless_test`:: -The default context if no other context is specified. See -<>. - -`filter`:: -Treats scripts as if they were run inside a `script` query. See -<>. - -`score`:: -Treats scripts as if they were run inside a `script_score` function in a -`function_score` query. See <>. - -[[painless-execute-runtime-context]] -.Field contexts -[%collapsible%open] -===== --- -The following options are specific to the field contexts. - -NOTE: Result ordering in the field contexts is not guaranteed. --- - -**** -`boolean_field`:: -The context for {ref}/boolean.html[`boolean` fields]. The script returns a `true` -or `false` response. See -<>. - -`date_field`:: -The context for {ref}/date.html[`date` fields]. `emit` takes a `long` value and -the script returns a sorted list of dates. See -<>. - -`double_field`:: -The context for `double` {ref}/number.html[numeric fields]. The script returns a -sorted list of `double` values. See -<>. - -`geo_point_field`:: -The context for {ref}/geo-point.html[`geo-point` fields]. `emit` takes two double -parameters, the latitude and longitude values, and the script returns an object in -GeoJSON format containing the coordinates for the geo point. See -<>. - -`ip_field`:: -The context for {ref}/ip.html[`ip` fields]. The script returns a sorted list of IP -addresses. See -<>. - -`keyword_field`:: -The context for {ref}/keyword.html[`keyword` fields]. The script returns a sorted -list of `string` values. See -<>. - -`long_field`:: -The context for `long` {ref}/number.html[numeric fields]. The script returns a -sorted list of `long` values. See <>. - -`composite_field`:: -The context for `composite` {ref}/runtime.html[runtime fields]. The script returns a -map of values. See <>. - -**** -===== -==== - -`context_setup`:: (Required, object) -Additional parameters for the `context`. -+ -NOTE: This parameter is required for all contexts except `painless_test`, -which is the default if no value is provided for `context`. -+ -.Properties of `context_setup` -[%collapsible%open] -==== -`document`:: (Required, string) -Document that's temporarily indexed in-memory and accessible from the script. - -`index`:: (Required, string) -Index containing a mapping that's compatible with the indexed document. -You may specify a remote index by prefixing the index with the remote cluster -alias. For example, `remote1:my_index` indicates that you want to execute -the painless script against the "my_index" index on the "remote1" cluster. This -request will be forwarded to the "remote1" cluster if you have -{ref}/remote-clusters-connect.html[configured a connection] to that remote cluster. - -NOTE: Wildcards are not accepted in the index expression for this endpoint. The -expression `*:myindex` will return the error "No such remote cluster" and the -expression `logs*` or `remote1:logs*` will return the error "index not found". -==== - -`params`:: (`Map`, read-only) -Specifies any named parameters that are passed into the script as variables. - -`query`:: (Optional, object) -NOTE: This parameter only applies when `score` is specified as the script -`context`. -+ -Use this parameter to specify a query for computing a score. Besides deciding -whether or not the document matches, the -{ref}/query-filter-context.html#query-context[query clause] also calculates a -relevance score in the `_score` metadata field. - -[[painless-execute-test]] -==== Test context -The `painless_test` context runs scripts without additional parameters. The only -variable that is available is `params`, which can be used to access user defined -values. The result of the script is always converted to a string. - -Because the default context is `painless_test`, you don't need to specify the -`context` or `context_setup`. - -===== Request - -[source,console] ----- -POST /_scripts/painless/_execute -{ - "script": { - "source": "params.count / params.total", - "params": { - "count": 100.0, - "total": 1000.0 - } - } -} ----- - -===== Response - -[source,console-result] ----- -{ - "result": "0.1" -} ----- - -[[painless-execute-filter-context]] -==== Filter context -The `filter` context treats scripts as if they were run inside a `script` query. -For testing purposes, a document must be provided so that it will be temporarily -indexed in-memory and is accessible from the script. More precisely, the -`_source`, stored fields and doc values of such a document are available to the -script being tested. - -===== Request - -[source,console] ----- -PUT /my-index-000001 -{ - "mappings": { - "properties": { - "field": { - "type": "keyword" - } - } - } -} ----- - -[source,console] ----- -POST /_scripts/painless/_execute -{ - "script": { - "source": "doc['field'].value.length() <= params.max_length", - "params": { - "max_length": 4 - } - }, - "context": "filter", - "context_setup": { - "index": "my-index-000001", - "document": { - "field": "four" - } - } -} ----- -// TEST[continued] - -===== Response - -[source,console-result] ----- -{ - "result": true -} ----- - -[[painless-execute-core-context]] -==== Score context -The `score` context treats scripts as if they were run inside a `script_score` -function in a `function_score` query. - -===== Request - -[source,console] ----- -PUT /my-index-000001 -{ - "mappings": { - "properties": { - "field": { - "type": "keyword" - }, - "rank": { - "type": "long" - } - } - } -} ----- - -[source,console] ----- -POST /_scripts/painless/_execute -{ - "script": { - "source": "doc['rank'].value / params.max_rank", - "params": { - "max_rank": 5.0 - } - }, - "context": "score", - "context_setup": { - "index": "my-index-000001", - "document": { - "rank": 4 - } - } -} ----- -// TEST[continued] - -===== Response - -[source,console-result] ----- -{ - "result": 0.8 -} ----- - -[[painless-execute-runtime-field-context]] -==== Field contexts -The field contexts treat scripts as if they were run inside the -{ref}/runtime-search-request.html[`runtime_mappings` section] of a search query. -You can use field contexts to test scripts for different field types, and then -include those scripts anywhere that they're supported, such as <>. - -Choose a field context based on the data type you want to return. - -[[painless-runtime-boolean]] -===== `boolean_field` -Use the `boolean_field` field context when you want to return a `true` -or `false` value from a script valuation. {ref}/boolean.html[Boolean fields] -accept `true` and `false` values, but can also accept strings that are -interpreted as either true or false. - -Let's say you have data for the top 100 science fiction books of all time. You -want to write scripts that return a boolean response such as whether books -exceed a certain page count, or if a book was published after a specific year. - -Consider that your data is structured like this: - -[source,console] ----- -PUT /my-index-000001 -{ - "mappings": { - "properties": { - "name": { - "type": "keyword" - }, - "author": { - "type": "keyword" - }, - "release_date": { - "type": "date" - }, - "page_count": { - "type": "double" - } - } - } -} ----- - -You can then write a script in the `boolean_field` context that indicates -whether a book was published before the year 1972: - -[source,console] ----- -POST /_scripts/painless/_execute -{ - "script": { - "source": """ - emit(doc['release_date'].value.year < 1972); - """ - }, - "context": "boolean_field", - "context_setup": { - "index": "my-index-000001", - "document": { - "name": "Dune", - "author": "Frank Herbert", - "release_date": "1965-06-01", - "page_count": 604 - } - } -} ----- -// TEST[continued] - -Because _Dune_ was published in 1965, the result returns as `true`: - -[source,console-result] ----- -{ - "result" : [ - true - ] -} ----- - -Similarly, you could write a script that determines whether the first name of -an author exceeds a certain number of characters. The following script operates -on the `author` field to determine whether the author's first name contains at -least one character, but is less than five characters: - -[source,console] ----- -POST /_scripts/painless/_execute -{ - "script": { - "source": """ - int space = doc['author'].value.indexOf(' '); - emit(space > 0 && space < 5); - """ - }, - "context": "boolean_field", - "context_setup": { - "index": "my-index-000001", - "document": { - "name": "Dune", - "author": "Frank Herbert", - "release_date": "1965-06-01", - "page_count": 604 - } - } -} ----- -// TEST[continued] - -Because `Frank` is five characters, the response returns `false` for the script -valuation: - -[source,console-result] ----- -{ - "result" : [ - false - ] -} ----- - -[[painless-runtime-datetime]] -===== `date_time` -Several options are available for using -<>. In this example, you'll -estimate when a particular author starting writing a book based on its release -date and the writing speed of that author. The example makes some assumptions, -but shows to write a script that operates on a date while incorporating -additional information. - -Add the following fields to your index mapping to get started: - -[source,console] ----- -PUT /my-index-000001 -{ - "mappings": { - "properties": { - "name": { - "type": "keyword" - }, - "author": { - "type": "keyword" - }, - "release_date": { - "type": "date" - }, - "page_count": { - "type": "long" - } - } - } -} ----- - -The following script makes the incredible assumption that when writing a book, -authors just write each page and don't do research or revisions. Further, the -script assumes that the average time it takes to write a page is eight hours. - -The script retrieves the `author` and makes another fantastic assumption to -either divide or multiply the `pageTime` value based on the author's perceived -writing speed (yet another wild assumption). - -The script subtracts the release date value (in milliseconds) from the -calculation of `pageTime` times the `page_count` to determine approximately -(based on numerous assumptions) when the author began writing the book. - -[source,console] ----- -POST /_scripts/painless/_execute -{ - "script": { - "source": """ - String author = doc['author'].value; - long pageTime = 28800000; <1> - if (author == 'Robert A. Heinlein') { - pageTime /= 2; <2> - } else if (author == 'Alastair Reynolds') { - pageTime *= 2; <3> - } - emit(doc['release_date'].value.toInstant().toEpochMilli() - pageTime * doc['page_count'].value); - """ - }, - "context": "date_field", - "context_setup": { - "index": "my-index-000001", - "document": { - "name": "Revelation Space", - "author": "Alastair Reynolds", - "release_date": "2000-03-15", - "page_count": 585 - } - } -} ----- -//TEST[continued] -<1> Eight hours, represented in milliseconds -<2> Incredibly fast writing from Robert A. Heinlein -<3> Alastair Reynolds writes space operas at a much slower speed - -In this case, the author is Alastair Reynolds. Based on a release date of -`2000-03-15`, the script calculates that the author started writing -`Revelation Space` on 19 February 1999. Writing a 585 page book in just over one -year is pretty impressive! - -[source,console-result] ----- -{ - "result" : [ - "1999-02-19T00:00:00.000Z" - ] -} ----- - -[[painless-runtime-double]] -===== `double_field` -Use the `double_field` context for {ref}/number.html[numeric data] of type -`double`. For example, let's say you have sensor data that includes a `voltage` -field with values like 5.6. After indexing millions of documents, you discover -that the sensor with model number `QVKC92Q` is under reporting its voltage by a -factor of 1.7. Rather than reindex your data, you can fix it with a -runtime field. - -You need to multiply this value, but only for -sensors that match a specific model number. - -Add the following fields to your index mapping. The `voltage` field is a -sub-field of the `measures` object. - -[source,console] ----- -PUT my-index-000001 -{ - "mappings": { - "properties": { - "@timestamp": { - "type": "date" - }, - "model_number": { - "type": "keyword" - }, - "measures": { - "properties": { - "voltage": { - "type": "double" - } - } - } - } - } -} ----- - -The following script matches on any documents where the `model_number` equals -`QVKC92Q`, and then multiplies the `voltage` value by `1.7`. This script is -useful when you want to select specific documents and only operate on values -that match the specified criteria. - -[source,console] ----- -POST /_scripts/painless/_execute -{ - "script": { - "source": """ - if (doc['model_number'].value.equals('QVKC92Q')) - {emit(1.7 * params._source['measures']['voltage']);} - else{emit(params._source['measures']['voltage']);} - """ - }, - "context": "double_field", - "context_setup": { - "index": "my-index-000001", - "document": { - "@timestamp": 1516470094000, - "model_number": "QVKC92Q", - "measures": { - "voltage": 5.6 - } - } - } -} ----- -// TEST[continued] - -The result includes the calculated voltage, which was determined by multiplying -the original value of `5.6` by `1.7`: - -[source,console-result] ----- -{ - "result" : [ - 9.52 - ] -} ----- - -[[painless-runtime-geo]] -===== `geo_point_field` -{ref}/geo-point.html[Geo-point] fields accept latitude-longitude pairs. You can -define a geo-point field in several ways, and include values for latitude and -longitude in the document for your script. - -If you already have a known geo-point, it's simpler to clearly state the -positions of `lat` and `lon` in your index mappings. - -[source,console] ----- -PUT /my-index-000001/ -{ - "mappings": { - "properties": { - "lat": { - "type": "double" - }, - "lon": { - "type": "double" - } - } - } -} ----- - -You can then use the `geo_point_field` runtime field context to write a script -that retrieves the `lat` and `lon` values. - -[source,console] ----- -POST /_scripts/painless/_execute -{ - "script": { - "source": """ - emit(doc['lat'].value, doc['lon'].value); - """ - }, - "context": "geo_point_field", - "context_setup": { - "index": "my-index-000001", - "document": { - "lat": 41.12, - "lon": -71.34 - } - } -} ----- -// TEST[continued] - -Because you're working with a geo-point field type, the response includes -results that are formatted as `coordinates`. - -[source,console-result] ----- -{ - "result" : [ - { - "coordinates" : [ - -71.34, - 41.12 - ], - "type" : "Point" - } - ] -} ----- - -[NOTE] -The emit function for {ref}/geo-point.html[geo-point] fields takes two parameters ordered with -`lat` before `lon`, but the output GeoJSON format orders the `coordinates` as `[ lon, lat ]`. - -[[painless-runtime-ip]] -===== `ip_field` -The `ip_field` context is useful for data that includes IP addresses of type -{ref}/ip.html[`ip`]. For example, let's say you have a `message` field from an Apache -log. This field contains an IP address, but also other data that you don't need. - -You can add the `message` field to your index mappings as a `wildcard` to accept -pretty much any data you want to put in that field. - -[source,console] ----- -PUT /my-index-000001/ -{ - "mappings": { - "properties": { - "message": { - "type": "wildcard" - } - } - } -} ----- - -You can then define a runtime script with a grok pattern that extracts -structured fields out of the `message` field. - -The script matches on the `%{COMMONAPACHELOG}` log pattern, which understands -the structure of Apache logs. If the pattern matches, the script emits the -value matching the IP address. If the pattern doesn’t match -(`clientip != null`), the script just returns the field value without crashing. - -[source,console] ----- -POST /_scripts/painless/_execute -{ - "script": { - "source": """ - String clientip=grok('%{COMMONAPACHELOG}').extract(doc["message"].value)?.clientip; - if (clientip != null) emit(clientip); - """ - }, - "context": "ip_field", - "context_setup": { - "index": "my-index-000001", - "document": { - "message": "40.135.0.0 - - [30/Apr/2020:14:30:17 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736" - } - } -} ----- -// TEST[continued] - -The response includes only the IP address, ignoring all of the other data in the -`message` field. - -[source,console-result] ----- -{ - "result" : [ - "40.135.0.0" - ] -} ----- - -[[painless-runtime-keyword]] -===== `keyword_field` -{ref}/keyword.html[Keyword fields] are often used in sorting, aggregations, and -term-level queries. - -Let's say you have a timestamp. You want to calculate the day of the week based -on that value and return it, such as `Thursday`. The following request adds a -`@timestamp` field of type `date` to the index mappings: - -[source,console] ----- -PUT /my-index-000001 -{ - "mappings": { - "properties": { - "@timestamp": { - "type": "date" - } - } - } -} ----- - -To return the equivalent day of week based on your timestamp, you can create a -script in the `keyword_field` runtime field context: - -[source,console] ----- -POST /_scripts/painless/_execute -{ - "script": { - "source": """ - emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); - """ - }, - "context": "keyword_field", - "context_setup": { - "index": "my-index-000001", - "document": { - "@timestamp": "2020-04-30T14:31:43-05:00" - } - } -} ----- -// TEST[continued] - -The script operates on the value provided for the `@timestamp` field to -calculate and return the day of the week: - -[source,console-result] ----- -{ - "result" : [ - "Thursday" - ] -} ----- - -[[painless-runtime-long]] -===== `long_field` -Let's say you have sensor data that a `measures` object. This object includes -a `start` and `end` field, and you want to calculate the difference between -those values. - -The following request adds a `measures` object to the mappings with two fields, -both of type `long`: - -[source,console] ----- -PUT /my-index-000001/ -{ - "mappings": { - "properties": { - "measures": { - "properties": { - "start": { - "type": "long" - }, - "end": { - "type": "long" - } - } - } - } - } -} ----- - -You can then define a script that assigns values to the `start` and `end` fields -and operate on them. The following script extracts the value for the `end` -field from the `measures` object and subtracts it from the `start` field: - -[source,console] ----- -POST /_scripts/painless/_execute -{ - "script": { - "source": """ - emit(doc['measures.end'].value - doc['measures.start'].value); - """ - }, - "context": "long_field", - "context_setup": { - "index": "my-index-000001", - "document": { - "measures": { - "voltage": "4.0", - "start": "400", - "end": "8625309" - } - } - } -} ----- -// TEST[continued] - -The response includes the calculated value from the script valuation: - -[source,console-result] ----- -{ - "result" : [ - 8624909 - ] -} ----- - -[[painless-runtime-composite]] -===== `composite_field` -Let's say you have logging data with a raw `message` field which you want to split -in multiple sub-fields that can be accessed separately. - -The following request adds a `message` field to the mappings of type `keyword`: - -[source,console] ----- -PUT /my-index-000001/ -{ - "mappings": { - "properties": { - "message": { - "type" : "keyword" - } - } - } -} ----- - -You can then define a script that splits such message field into subfields using -the grok function: - -[source,console] ----- -POST /_scripts/painless/_execute -{ - "script": { - "source": "emit(grok(\"%{COMMONAPACHELOG}\").extract(doc[\"message\"].value));" - }, - "context": "composite_field", - "context_setup": { - "index": "my-index-000001", - "document": { - "timestamp":"2020-04-30T14:31:27-05:00", - "message":"252.0.0.0 - - [30/Apr/2020:14:31:27 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736" - } - } -} ----- -// TEST[continued] - -The response includes the values that the script emitted: - -[source,console-result] ----- -{ - "result" : { - "composite_field.timestamp" : [ - "30/Apr/2020:14:31:27 -0500" - ], - "composite_field.auth" : [ - "-" - ], - "composite_field.response" : [ - "200" - ], - "composite_field.ident" : [ - "-" - ], - "composite_field.httpversion" : [ - "1.0" - ], - "composite_field.verb" : [ - "GET" - ], - "composite_field.bytes" : [ - "24736" - ], - "composite_field.clientip" : [ - "252.0.0.0" - ], - "composite_field.request" : [ - "/images/hm_bg.jpg" - ] - } -} ----- diff --git a/docs/painless/painless-guide/painless-ingest.asciidoc b/docs/painless/painless-guide/painless-ingest.asciidoc deleted file mode 100644 index 4a933786e69a4..0000000000000 --- a/docs/painless/painless-guide/painless-ingest.asciidoc +++ /dev/null @@ -1,119 +0,0 @@ -[[painless-ingest]] -=== Using ingest processors in Painless - -Some {ref}/processors.html[ingest processors] expose behavior through Painless -methods that can be called in Painless scripts that execute in ingest pipelines. - -==== Method usage - -All ingest methods available in Painless are scoped to the `Processors` -namespace. For example: - -[source,console] ----- -POST /_ingest/pipeline/_simulate?verbose -{ - "pipeline": { - "processors": [ - { - "script": { - "lang": "painless", - "source": """ - long bytes = Processors.bytes(ctx.size); - ctx.size_in_bytes = bytes; - """ - } - } - ] - }, - "docs": [ - { - "_source": { - "size": "1kb" - } - } - ] -} ----- - -==== Ingest methods reference - -===== Byte conversion -Use the {ref}/bytes-processor.html[bytes processor] to return the number of -bytes in the human-readable byte value supplied in the `value` parameter. - -[source,Painless] ----- -long bytes(String value); ----- - -===== Lowercase conversion -Use the {ref}/lowercase-processor.html[lowercase processor] to convert the -supplied string in the `value` parameter to its lowercase equivalent. - -[source,Painless] ----- -String lowercase(String value); ----- - -===== Uppercase conversion -Use the {ref}/uppercase-processor.html[uppercase processor] to convert the -supplied string in the `value` parameter to its uppercase equivalent. - -[source,Painless] ----- -String uppercase(String value); ----- - -===== JSON parsing -Use the {ref}/json-processor.html[JSON processor] to convert JSON strings to structured -JSON objects. The first `json` method accepts a map and a key. The processor converts -the JSON string in the map as specified by the `key` parameter to structured JSON content. -That content is added directly to the `map` object. - -The second `json` method accepts a JSON string in the `value` parameter and -returns a structured JSON object. - -[source,Painless] ----- -void json(Map map, String key); -Object json(Object value); ----- - -You can then add this object to the document through the context object: - -[source,Painless] ----- -Object json = Processors.json(ctx.inputJsonString); -ctx.structuredJson = json; ----- - -===== URL decoding -Use the {ref}/urldecode-processor.html[URL decode processor] to URL-decode the string -supplied in the `value` parameter. - -[source,Painless] ----- -String urlDecode(String value); ----- - -===== URI decomposition -Use the {ref}/uri-parts-processor.html[URI parts processor] to decompose the URI string -supplied in the `value` parameter. Returns a map of key-value pairs in which the key is -the name of the URI component such as `domain` or `path` and the value is the -corresponding value for that component. - -[source,Painless] ----- -String uriParts(String value); ----- - -===== Network community ID -Use the {ref}/community-id-processor.html[community ID processor] to compute the network -community ID for network flow data. - -[source,Painless] ----- -String communityId(String sourceIpAddrString, String destIpAddrString, Object ianaNumber, Object transport, Object sourcePort, Object destinationPort, Object icmpType, Object icmpCode, int seed) -String communityId(String sourceIpAddrString, String destIpAddrString, Object ianaNumber, Object transport, Object sourcePort, Object destinationPort, Object icmpType, Object icmpCode) ----- diff --git a/docs/painless/painless-guide/painless-method-dispatch.asciidoc b/docs/painless/painless-guide/painless-method-dispatch.asciidoc deleted file mode 100644 index dcb5a5b3cd1fb..0000000000000 --- a/docs/painless/painless-guide/painless-method-dispatch.asciidoc +++ /dev/null @@ -1,30 +0,0 @@ -[[modules-scripting-painless-dispatch]] -=== How painless dispatches functions - -Painless uses receiver, name, and {wikipedia}/Arity[arity] -for method dispatch. For example, `s.foo(a, b)` is resolved by first getting -the class of `s` and then looking up the method `foo` with two parameters. This -is different from Groovy which uses the -{wikipedia}/Multiple_dispatch[runtime types] of the -parameters and Java which uses the compile time types of the parameters. - -The consequence of this that Painless doesn't support overloaded methods like -Java, leading to some trouble when it allows classes from the Java -standard library. For example, in Java and Groovy, `Matcher` has two methods: -`group(int)` and `group(String)`. Painless can't allow both of these methods -because they have the same name and the same number of parameters. So instead it -has `group(int)` and `namedGroup(String)`. - -We have a few justifications for this different way of dispatching methods: - -1. It makes operating on `def` types simpler and, presumably, faster. Using -receiver, name, and arity means that when Painless sees a call on a `def` object it -can dispatch the appropriate method without having to do expensive comparisons -of the types of the parameters. The same is true for invocations with `def` -typed parameters. -2. It keeps things consistent. It would be genuinely weird for Painless to -behave like Groovy if any `def` typed parameters were involved and Java -otherwise. It'd be slow for it to behave like Groovy all the time. -3. It keeps Painless maintainable. Adding the Java or Groovy like method -dispatch *feels* like it'd add a ton of complexity which'd make maintenance and -other improvements much more difficult. diff --git a/docs/painless/painless-guide/painless-runtime-fields.asciidoc b/docs/painless/painless-guide/painless-runtime-fields.asciidoc deleted file mode 100644 index 5252a89923889..0000000000000 --- a/docs/painless/painless-guide/painless-runtime-fields.asciidoc +++ /dev/null @@ -1,89 +0,0 @@ -[[painless-runtime-fields]] -=== Use Painless scripts in runtime fields -A runtime field is a field that is evaluated at query time. When you define a -runtime field, you can immediately use it in search requests, aggregations, -filtering, and sorting. - -When defining a runtime field, you can include a Painless script that is -evaluated at query time. This script has access to the entire context of a -document, including the original document {ref}/modules-scripting-fields.html[`_source` field] -and any mapped fields plus their values. At query time, the script runs and -generates values for each scripted field that is included in the query. - -You can map a runtime field in the `runtime` section under the mapping -definition, or define runtime fields that exist only as part of a search -request. The script syntax is the same, regardless of where you define the -runtime field. - -IMPORTANT: When defining a Painless script to use with runtime fields, you must -include `emit` to return calculated values. - -[discrete] -[[painless-runtime-fields-mapping]] -==== Define a runtime field in the mapping -Add a `runtime` section under the {ref}/runtime-mapping-fields.html[mapping definition] to explore your data without indexing fields. - -The script in the following request extracts the day of the week from the -`@timestamp` field, which is defined as a `date` type. The script calculates -the day of the week based on the value of `@timestamp`, and uses `emit` to -return the calculated value. - -[source,console] ----- -PUT my-index/ -{ - "mappings": { - "runtime": { - "day_of_week": { - "type": "keyword", - "script": { - "source": - """emit(doc['@timestamp'].value.dayOfWeekEnum - .getDisplayName(TextStyle.FULL, Locale.ROOT))""" - } - } - }, - "properties": { - "@timestamp": {"type": "date"} - } - } -} ----- - -[discrete] -[[painless-runtime-fields-query]] -==== Define a runtime field only in a search request -Use runtime fields in a search request to create a field that exists -{ref}/runtime-search-request.html[only as part of the query]. You can also {ref}/runtime-override-values.html[override field values] at query time for existing fields without -modifying the field itself. - -This flexibility allows you to experiment with your data schema and fix -mistakes in your index mapping without reindexing your data. - -In the following request, the values for the `day_of_week` field are calculated -dynamically, and only within the context of this search request: - -[source,console] ----- -GET my-index/_search -{ - "runtime_mappings": { - "day_of_week": { - "type": "keyword", - "script": { - "source": - """emit(doc['@timestamp'].value.dayOfWeekEnum - .getDisplayName(TextStyle.FULL, Locale.ROOT))""" - } - } - }, - "aggs": { - "day_of_week": { - "terms": { - "field": "day_of_week" - } - } - } -} ----- -//TEST[continued] diff --git a/docs/painless/painless-guide/painless-walkthrough.asciidoc b/docs/painless/painless-guide/painless-walkthrough.asciidoc deleted file mode 100644 index 38ccb68cbeec6..0000000000000 --- a/docs/painless/painless-guide/painless-walkthrough.asciidoc +++ /dev/null @@ -1,349 +0,0 @@ -[[painless-walkthrough]] -=== A Brief Painless Walkthrough - -To illustrate how Painless works, let's load some hockey stats into an Elasticsearch index: - -[source,console] ----------------------------------------------------------------- -PUT hockey/_bulk?refresh -{"index":{"_id":1}} -{"first":"johnny","last":"gaudreau","goals":[9,27,1],"assists":[17,46,0],"gp":[26,82,1],"born":"1993/08/13"} -{"index":{"_id":2}} -{"first":"sean","last":"monohan","goals":[7,54,26],"assists":[11,26,13],"gp":[26,82,82],"born":"1994/10/12"} -{"index":{"_id":3}} -{"first":"jiri","last":"hudler","goals":[5,34,36],"assists":[11,62,42],"gp":[24,80,79],"born":"1984/01/04"} -{"index":{"_id":4}} -{"first":"micheal","last":"frolik","goals":[4,6,15],"assists":[8,23,15],"gp":[26,82,82],"born":"1988/02/17"} -{"index":{"_id":5}} -{"first":"sam","last":"bennett","goals":[5,0,0],"assists":[8,1,0],"gp":[26,1,0],"born":"1996/06/20"} -{"index":{"_id":6}} -{"first":"dennis","last":"wideman","goals":[0,26,15],"assists":[11,30,24],"gp":[26,81,82],"born":"1983/03/20"} -{"index":{"_id":7}} -{"first":"david","last":"jones","goals":[7,19,5],"assists":[3,17,4],"gp":[26,45,34],"born":"1984/08/10"} -{"index":{"_id":8}} -{"first":"tj","last":"brodie","goals":[2,14,7],"assists":[8,42,30],"gp":[26,82,82],"born":"1990/06/07"} -{"index":{"_id":39}} -{"first":"mark","last":"giordano","goals":[6,30,15],"assists":[3,30,24],"gp":[26,60,63],"born":"1983/10/03"} -{"index":{"_id":10}} -{"first":"mikael","last":"backlund","goals":[3,15,13],"assists":[6,24,18],"gp":[26,82,82],"born":"1989/03/17"} -{"index":{"_id":11}} -{"first":"joe","last":"colborne","goals":[3,18,13],"assists":[6,20,24],"gp":[26,67,82],"born":"1990/01/30"} ----------------------------------------------------------------- -// TESTSETUP - -[discrete] -==== Accessing Doc Values from Painless - -Document values can be accessed from a `Map` named `doc`. - -For example, the following script calculates a player's total goals. This example uses a strongly typed `int` and a `for` loop. - -[source,console] ----------------------------------------------------------------- -GET hockey/_search -{ - "query": { - "function_score": { - "script_score": { - "script": { - "lang": "painless", - "source": """ - int total = 0; - for (int i = 0; i < doc['goals'].length; ++i) { - total += doc['goals'][i]; - } - return total; - """ - } - } - } - } -} ----------------------------------------------------------------- - -Alternatively, you could do the same thing using a script field instead of a function score: - -[source,console] ----------------------------------------------------------------- -GET hockey/_search -{ - "query": { - "match_all": {} - }, - "script_fields": { - "total_goals": { - "script": { - "lang": "painless", - "source": """ - int total = 0; - for (int i = 0; i < doc['goals'].length; ++i) { - total += doc['goals'][i]; - } - return total; - """ - } - } - } -} ----------------------------------------------------------------- - -The following example uses a Painless script to sort the players by their combined first and last names. The names are accessed using -`doc['first'].value` and `doc['last'].value`. - -[source,console] ----------------------------------------------------------------- -GET hockey/_search -{ - "query": { - "match_all": {} - }, - "sort": { - "_script": { - "type": "string", - "order": "asc", - "script": { - "lang": "painless", - "source": "doc['first.keyword'].value + ' ' + doc['last.keyword'].value" - } - } - } -} ----------------------------------------------------------------- - -[discrete] -==== Missing keys - -`doc['myfield'].value` throws an exception if -the field is missing in a document. - -For more dynamic index mappings, you may consider writing a catch equation - -``` -if (!doc.containsKey('myfield') || doc['myfield'].empty) { return "unavailable" } else { return doc['myfield'].value } -``` - -[discrete] -==== Missing values - -To check if a document is missing a value, you can call -`doc['myfield'].size() == 0`. - -[discrete] -==== Updating Fields with Painless - -You can also easily update fields. You access the original source for a field as `ctx._source.`. - -First, let's look at the source data for a player by submitting the following request: - -[source,console] ----------------------------------------------------------------- -GET hockey/_search -{ - "query": { - "term": { - "_id": 1 - } - } -} ----------------------------------------------------------------- - -To change player 1's last name to `hockey`, simply set `ctx._source.last` to the new value: - -[source,console] ----------------------------------------------------------------- -POST hockey/_update/1 -{ - "script": { - "lang": "painless", - "source": "ctx._source.last = params.last", - "params": { - "last": "hockey" - } - } -} ----------------------------------------------------------------- - -You can also add fields to a document. For example, this script adds a new field that contains -the player's nickname, _hockey_. - -[source,console] ----------------------------------------------------------------- -POST hockey/_update/1 -{ - "script": { - "lang": "painless", - "source": """ - ctx._source.last = params.last; - ctx._source.nick = params.nick - """, - "params": { - "last": "gaudreau", - "nick": "hockey" - } - } -} ----------------------------------------------------------------- - -[discrete] -[[modules-scripting-painless-dates]] -==== Dates - -Date fields are exposed as -`ZonedDateTime`, so they support methods like `getYear`, `getDayOfWeek` -or e.g. getting milliseconds since epoch with `getMillis`. To use these -in a script, leave out the `get` prefix and continue with lowercasing the -rest of the method name. For example, the following returns every hockey -player's birth year: - -[source,console] ----------------------------------------------------------------- -GET hockey/_search -{ - "script_fields": { - "birth_year": { - "script": { - "source": "doc.born.value.year" - } - } - } -} ----------------------------------------------------------------- - -[discrete] -[[modules-scripting-painless-regex]] -==== Regular expressions - -NOTE: Regexes are enabled by default as the Setting `script.painless.regex.enabled` -has a new option, `limited`, the default. This defaults to using regular expressions -but limiting the complexity of the regular expressions. Innocuous looking regexes -can have staggering performance and stack depth behavior. But still, they remain an -amazingly powerful tool. In addition, to `limited`, the setting can be set to `true`, -as before, which enables regular expressions without limiting them.To enable them -yourself set `script.painless.regex.enabled: true` in `elasticsearch.yml`. - -Painless's native support for regular expressions has syntax constructs: - -* `/pattern/`: Pattern literals create patterns. This is the only way to create -a pattern in painless. The pattern inside the ++/++'s are just -https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html[Java regular expressions]. -See <> for more. -* `=~`: The find operator return a `boolean`, `true` if a subsequence of the -text matches, `false` otherwise. -* `==~`: The match operator returns a `boolean`, `true` if the text matches, -`false` if it doesn't. - -Using the find operator (`=~`) you can update all hockey players with "b" in -their last name: - -[source,console] ----------------------------------------------------------------- -POST hockey/_update_by_query -{ - "script": { - "lang": "painless", - "source": """ - if (ctx._source.last =~ /b/) { - ctx._source.last += "matched"; - } else { - ctx.op = "noop"; - } - """ - } -} ----------------------------------------------------------------- - -Using the match operator (`==~`) you can update all the hockey players whose -names start with a consonant and end with a vowel: - -[source,console] ----------------------------------------------------------------- -POST hockey/_update_by_query -{ - "script": { - "lang": "painless", - "source": """ - if (ctx._source.last ==~ /[^aeiou].*[aeiou]/) { - ctx._source.last += "matched"; - } else { - ctx.op = "noop"; - } - """ - } -} ----------------------------------------------------------------- - -You can use the `Pattern.matcher` directly to get a `Matcher` instance and -remove all of the vowels in all of their last names: - -[source,console] ----------------------------------------------------------------- -POST hockey/_update_by_query -{ - "script": { - "lang": "painless", - "source": "ctx._source.last = /[aeiou]/.matcher(ctx._source.last).replaceAll('')" - } -} ----------------------------------------------------------------- - -`Matcher.replaceAll` is just a call to Java's `Matcher`'s -https://docs.oracle.com/javase/8/docs/api/java/util/regex/Matcher.html#replaceAll-java.lang.String-[replaceAll] -method so it supports `$1` and `\1` for replacements: - -[source,console] ----------------------------------------------------------------- -POST hockey/_update_by_query -{ - "script": { - "lang": "painless", - "source": "ctx._source.last = /n([aeiou])/.matcher(ctx._source.last).replaceAll('$1')" - } -} ----------------------------------------------------------------- - -If you need more control over replacements you can call `replaceAll` on a -`CharSequence` with a `Function` that builds the replacement. -This does not support `$1` or `\1` to access replacements because you already -have a reference to the matcher and can get them with `m.group(1)`. - -IMPORTANT: Calling `Matcher.find` inside of the function that builds the -replacement is rude and will likely break the replacement process. - -This will make all of the vowels in the hockey player's last names upper case: - -[source,console] ----------------------------------------------------------------- -POST hockey/_update_by_query -{ - "script": { - "lang": "painless", - "source": """ - ctx._source.last = ctx._source.last.replaceAll(/[aeiou]/, m -> - m.group().toUpperCase(Locale.ROOT)) - """ - } -} ----------------------------------------------------------------- - -Or you can use the `CharSequence.replaceFirst` to make the first vowel in their -last names upper case: - -[source,console] ----------------------------------------------------------------- -POST hockey/_update_by_query -{ - "script": { - "lang": "painless", - "source": """ - ctx._source.last = ctx._source.last.replaceFirst(/[aeiou]/, m -> - m.group().toUpperCase(Locale.ROOT)) - """ - } -} ----------------------------------------------------------------- - -Note: all of the `_update_by_query` examples above could really do with a -`query` to limit the data that they pull back. While you *could* use a -{ref}/query-dsl-script-query.html[script query] it wouldn't be as efficient -as using any other query because script queries aren't able to use the inverted -index to limit the documents that they have to check. diff --git a/docs/painless/painless-lang-spec.asciidoc b/docs/painless/painless-lang-spec.asciidoc deleted file mode 100644 index aeb1a9d4c753d..0000000000000 --- a/docs/painless/painless-lang-spec.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -[[painless-lang-spec]] -== Painless Language Specification - -Painless is a scripting language designed for security and performance. -Painless syntax is similar to Java syntax along with some additional -features such as dynamic typing, Map and List accessor shortcuts, and array -initializers. As a direct comparison to Java, there are some important -differences, especially related to the casting model. For more detailed -conceptual information about the basic constructs that Painless and Java share, -refer to the corresponding topics in the -https://docs.oracle.com/javase/specs/jls/se8/html/index.html[Java Language -Specification]. - -Painless scripts are parsed and compiled using the https://www.antlr.org/[ANTLR4] -and https://asm.ow2.org/[ASM] libraries. Scripts are compiled directly -into Java Virtual Machine (JVM) byte code and executed against a standard JVM. -This specification uses ANTLR4 grammar notation to describe the allowed syntax. -However, the actual Painless grammar is more compact than what is shown here. - -include::painless-lang-spec/index.asciidoc[] diff --git a/docs/painless/painless-lang-spec/index.asciidoc b/docs/painless/painless-lang-spec/index.asciidoc deleted file mode 100644 index e75264ff3e4e1..0000000000000 --- a/docs/painless/painless-lang-spec/index.asciidoc +++ /dev/null @@ -1,35 +0,0 @@ -include::painless-comments.asciidoc[] - -include::painless-keywords.asciidoc[] - -include::painless-literals.asciidoc[] - -include::painless-identifiers.asciidoc[] - -include::painless-variables.asciidoc[] - -include::painless-types.asciidoc[] - -include::painless-casting.asciidoc[] - -include::painless-operators.asciidoc[] - -include::painless-operators-general.asciidoc[] - -include::painless-operators-numeric.asciidoc[] - -include::painless-operators-boolean.asciidoc[] - -include::painless-operators-reference.asciidoc[] - -include::painless-operators-array.asciidoc[] - -include::painless-statements.asciidoc[] - -include::painless-scripts.asciidoc[] - -include::painless-functions.asciidoc[] - -include::painless-lambdas.asciidoc[] - -include::painless-regexes.asciidoc[] diff --git a/docs/painless/painless-lang-spec/painless-casting.asciidoc b/docs/painless/painless-lang-spec/painless-casting.asciidoc deleted file mode 100644 index 48a82734507ed..0000000000000 --- a/docs/painless/painless-lang-spec/painless-casting.asciidoc +++ /dev/null @@ -1,536 +0,0 @@ -[[painless-casting]] -=== Casting - -A cast converts the value of an original type to the equivalent value of a -target type. An implicit cast infers the target type and automatically occurs -during certain <>. An explicit cast specifies -the target type and forcefully occurs as its own operation. Use the `cast -operator '()'` to specify an explicit cast. - -Refer to the <> for a quick reference on all -allowed casts. - -*Errors* - -* If during a cast there exists no equivalent value for the target type. -* If an implicit cast is given, but an explicit cast is required. - -*Grammar* - -[source,ANTLR4] ----- -cast: '(' TYPE ')' expression ----- - -*Examples* - -* Valid casts. -+ -[source,Painless] ----- -int i = (int)5L; <1> -Map m = new HashMap(); <2> -HashMap hm = (HashMap)m; <3> ----- -+ -<1> declare `int i`; - explicit cast `long 5` to `int 5` -> `int 5`; - store `int 5` to `i` -<2> declare `Map m`; - allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `Map reference` -> `Map reference`; - store `Map reference` to `m` -<3> declare `HashMap hm`; - load from `m` -> `Map reference`; - explicit cast `Map reference` to `HashMap reference` -> `HashMap reference`; - store `HashMap reference` to `hm` - -[[numeric-type-casting]] -==== Numeric Type Casting - -A <> cast converts the value of an original -numeric type to the equivalent value of a target numeric type. A cast between -two numeric type values results in data loss when the value of the original -numeric type is larger than the target numeric type can accommodate. A cast -between an integer type value and a floating point type value can result in -precision loss. - -The allowed casts for values of each numeric type are shown as a row in the -following table: - -|==== -| | byte | short | char | int | long | float | double -| byte | | implicit | implicit | implicit | implicit | implicit | implicit -| short | explicit | | explicit | implicit | implicit | implicit | implicit -| char | explicit | explicit | | implicit | implicit | implicit | implicit -| int | explicit | explicit | explicit | | implicit | implicit | implicit -| long | explicit | explicit | explicit | explicit | | implicit | implicit -| float | explicit | explicit | explicit | explicit | explicit | | implicit -| double | explicit | explicit | explicit | explicit | explicit | explicit | -|==== - -*Examples* - -* Valid numeric type casts. -+ -[source,Painless] ----- -int a = 1; <1> -long b = a; <2> -short c = (short)b; <3> -double e = (double)a; <4> ----- -+ -<1> declare `int a`; - store `int 1` to `a` -<2> declare `long b`; - load from `a` -> `int 1`; - implicit cast `int 1` to `long 1` -> `long 1`; - store `long 1` to `b` -<3> declare `short c`; - load from `b` -> `long 1`; - explicit cast `long 1` to `short 1` -> `short 1`; - store `short 1` value to `c` -<4> declare `double e`; - load from `a` -> `int 1`; - explicit cast `int 1` to `double 1.0`; - store `double 1.0` to `e`; - (note the explicit cast is extraneous since an implicit cast is valid) -+ -* Invalid numeric type casts resulting in errors. -+ -[source,Painless] ----- -int a = 1.0; // error <1> -int b = 2; <2> -byte c = b; // error <3> ----- -+ -<1> declare `int i`; - *error* -> cannot implicit cast `double 1.0` to `int 1`; - (note an explicit cast is valid) -<2> declare `int b`; - store `int 2` to `b` -<3> declare byte `c`; - load from `b` -> `int 2`; - *error* -> cannot implicit cast `int 2` to `byte 2`; - (note an explicit cast is valid) - -[[reference-type-casting]] -==== Reference Type Casting - -A <> cast converts the value of an original -reference type to the equivalent value of a target reference type. An implicit -cast between two reference type values is allowed when the original reference -type is a descendant of the target type. An explicit cast between two reference -type values is allowed when the original type is a descendant of the target type -or the target type is a descendant of the original type. - -*Examples* - -* Valid reference type casts. -+ -[source,Painless] ----- -List x; <1> -ArrayList y = new ArrayList(); <2> -x = y; <3> -y = (ArrayList)x; <4> -x = (List)y; <5> ----- -+ -<1> declare `List x`; - store default value `null` to `x` -<2> declare `ArrayList y`; - allocate `ArrayList` instance -> `ArrayList reference`; - store `ArrayList reference` to `y`; -<3> load from `y` -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `x`; - (note `ArrayList` is a descendant of `List`) -<4> load from `x` -> `List reference`; - explicit cast `List reference` to `ArrayList reference` - -> `ArrayList reference`; - store `ArrayList reference` to `y`; -<5> load from `y` -> `ArrayList reference`; - explicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `x`; - (note the explicit cast is extraneous, and an implicit cast is valid) -+ -* Invalid reference type casts resulting in errors. -+ -[source,Painless] ----- -List x = new ArrayList(); <1> -ArrayList y = x; // error <2> -Map m = (Map)x; // error <3> ----- -+ -<1> declare `List x`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `x` -<2> declare `ArrayList y`; - load from `x` -> `List reference`; - *error* -> cannot implicit cast `List reference` to `ArrayList reference`; - (note an explicit cast is valid since `ArrayList` is a descendant of `List`) -<3> declare `ArrayList y`; - load from `x` -> `List reference`; - *error* -> cannot explicit cast `List reference` to `Map reference`; - (note no cast is valid since neither `List` nor `Map` is a descendant of the - other) - -[[dynamic-type-casting]] -==== Dynamic Type Casting - -A <> cast converts the value of an original -`def` type to the equivalent value of any target type or converts the value of -any original type to the equivalent value of a target `def` type. - -An implicit cast from any original type value to a `def` type value is always -allowed. An explicit cast from any original type value to a `def` type value is -always allowed but never necessary. - -An implicit or explicit cast from an original `def` type value to -any target type value is allowed if and only if the cast is normally allowed -based on the current type value the `def` type value represents. - -*Examples* - -* Valid dynamic type casts with any original type to a target `def` type. -+ -[source,Painless] ----- -def d0 = 3; <1> -d0 = new ArrayList(); <2> -Object o = new HashMap(); <3> -def d1 = o; <4> -int i = d1.size(); <5> ----- -+ -<1> declare `def d0`; - implicit cast `int 3` to `def`; - store `int 3` to `d0` -<2> allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `def` -> `def`; - store `def` to `d0` -<3> declare `Object o`; - allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `Object reference` - -> `Object reference`; - store `Object reference` to `o` -<4> declare `def d1`; - load from `o` -> `Object reference`; - implicit cast `Object reference` to `def` -> `def`; - store `def` to `d1` -<5> declare `int i`; - load from `d1` -> `def`; - implicit cast `def` to `HashMap reference` -> HashMap reference`; - call `size` on `HashMap reference` -> `int 0`; - store `int 0` to `i`; - (note `def` was implicit cast to `HashMap reference` since `HashMap` is the - child-most descendant type value that the `def` type value - represents) -+ -* Valid dynamic type casts with an original `def` type to any target type. -+ -[source,Painless] ----- -def d = 1.0; <1> -int i = (int)d; <2> -d = 1; <3> -float f = d; <4> -d = new ArrayList(); <5> -List l = d; <6> ----- -+ -<1> declare `def d`; - implicit cast `double 1.0` to `def` -> `def`; - store `def` to `d` -<2> declare `int i`; - load from `d` -> `def`; - implicit cast `def` to `double 1.0` -> `double 1.0`; - explicit cast `double 1.0` to `int 1` -> `int 1`; - store `int 1` to `i`; - (note the explicit cast is necessary since a `double` type value is not - converted to an `int` type value implicitly) -<3> store `int 1` to `d`; - (note the switch in the type `d` represents from `double` to `int`) -<4> declare `float i`; - load from `d` -> `def`; - implicit cast `def` to `int 1` -> `int 1`; - implicit cast `int 1` to `float 1.0` -> `float 1.0`; - store `float 1.0` to `f` -<5> allocate `ArrayList` instance -> `ArrayList reference`; - store `ArrayList reference` to `d`; - (note the switch in the type `d` represents from `int` to `ArrayList`) -<6> declare `List l`; - load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `l` -+ -* Invalid dynamic type casts resulting in errors. -+ -[source,Painless] ----- -def d = 1; <1> -short s = d; // error <2> -d = new HashMap(); <3> -List l = d; // error <4> ----- -<1> declare `def d`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `d` -<2> declare `short s`; - load from `d` -> `def`; - implicit cast `def` to `int 1` -> `int 1`; - *error* -> cannot implicit cast `int 1` to `short 1`; - (note an explicit cast is valid) -<3> allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `def` -> `def`; - store `def` to `d` -<4> declare `List l`; - load from `d` -> `def`; - implicit cast `def` to `HashMap reference`; - *error* -> cannot implicit cast `HashMap reference` to `List reference`; - (note no cast is valid since neither `HashMap` nor `List` is a descendant of - the other) - -[[string-character-casting]] -==== String to Character Casting - -Use the cast operator to convert a <> value into a -<> value. - -*Errors* - -* If the `String` type value isn't one character in length. -* If the `String` type value is `null`. - -*Examples* - -* Casting string literals into `char` type values. -+ -[source,Painless] ----- -char c = (char)"C"; <1> -c = (char)'c'; <2> ----- -+ -<1> declare `char c`; - explicit cast `String "C"` to `char C` -> `char C`; - store `char C` to `c` -<2> explicit cast `String 'c'` to `char c` -> `char c`; - store `char c` to `c` -+ -* Casting a `String` reference into a `char` type value. -+ -[source,Painless] ----- -String s = "s"; <1> -char c = (char)s; <2> ----- -<1> declare `String s`; - store `String "s"` to `s`; -<2> declare `char c` - load from `s` -> `String "s"`; - explicit cast `String "s"` to `char s` -> `char s`; - store `char s` to `c` - -[[character-string-casting]] -==== Character to String Casting - -Use the cast operator to convert a <> value into a -<> value. - -*Examples* - -* Casting a `String` reference into a `char` type value. -+ -[source,Painless] ----- -char c = 65; <1> -String s = (String)c; <2> ----- -<1> declare `char c`; - store `char 65` to `c`; -<2> declare `String s` - load from `c` -> `char A`; - explicit cast `char A` to `String "A"` -> `String "A"`; - store `String "A"` to `s` - -[[boxing-unboxing]] -==== Boxing and Unboxing - -Boxing is a special type of cast used to convert a primitive type to its -corresponding reference type. Unboxing is the reverse used to convert a -reference type to its corresponding primitive type. - -Implicit boxing/unboxing occurs during the following operations: - -* Conversions between a `def` type and a primitive type are implicitly - boxed/unboxed as necessary, though this is referred to as an implicit cast - throughout the documentation. -* Method/function call arguments are implicitly boxed/unboxed as necessary. -* A primitive type value is implicitly boxed when a reference type method - is called on it. - -Explicit boxing/unboxing is not allowed. Use the reference type API to -explicitly convert a primitive type value to its respective reference type -value and vice versa. - -*Errors* - -* If an explicit cast is made to box/unbox a primitive type. - -*Examples* - -* Uses of implicit boxing/unboxing. -+ -[source,Painless] ----- -List l = new ArrayList(); <1> -l.add(1); <2> -Integer I = Integer.valueOf(0); <3> -int i = l.get(i); <4> ----- -+ -<1> declare `List l`; - allocate `ArrayList` instance -> `ArrayList reference`; - store `ArrayList reference` to `l`; -<2> load from `l` -> `List reference`; - implicit cast `int 1` to `def` -> `def`; - call `add` on `List reference` with arguments (`def`); - (note internally `int 1` is boxed to `Integer 1` to store as a `def` type - value) -<3> declare `Integer I`; - call `valueOf` on `Integer` with arguments of (`int 0`) -> `Integer 0`; - store `Integer 0` to `I`; -<4> declare `int i`; - load from `I` -> `Integer 0`; - unbox `Integer 0` -> `int 0`; - load from `l` -> `List reference`; - call `get` on `List reference` with arguments (`int 0`) -> `def`; - implicit cast `def` to `int 1` -> `int 1`; - store `int 1` to `i`; - (note internally `int 1` is unboxed from `Integer 1` when loaded from a - `def` type value) -+ -* Uses of invalid boxing/unboxing resulting in errors. -+ -[source,Painless] ----- -Integer x = 1; // error <1> -Integer y = (Integer)1; // error <2> -int a = Integer.valueOf(1); // error <3> -int b = (int)Integer.valueOf(1); // error <4> ----- -+ -<1> declare `Integer x`; - *error* -> cannot implicit box `int 1` to `Integer 1` during assignment -<2> declare `Integer y`; - *error* -> cannot explicit box `int 1` to `Integer 1` during assignment -<3> declare `int a`; - call `valueOf` on `Integer` with arguments of (`int 1`) -> `Integer 1`; - *error* -> cannot implicit unbox `Integer 1` to `int 1` during assignment -<4> declare `int a`; - call `valueOf` on `Integer` with arguments of (`int 1`) -> `Integer 1`; - *error* -> cannot explicit unbox `Integer 1` to `int 1` during assignment - -[[promotion]] -==== Promotion - -Promotion is when a single value is implicitly cast to a certain type or -multiple values are implicitly cast to the same type as required for evaluation -by certain operations. Each operation that requires promotion has a promotion -table that shows all required implicit casts based on the type(s) of value(s). A -value promoted to a `def` type at compile-time is promoted again at run-time -based on the type the `def` value represents. - -*Errors* - -* If a specific operation cannot find an allowed promotion type for the type(s) - of value(s) given. - -*Examples* - -* Uses of promotion. -+ -[source,Painless] ----- -double d = 2 + 2.0; <1> -def x = 1; <2> -float f = x + 2.0F; <3> ----- -<1> declare `double d`; - promote `int 2` and `double 2.0 @0`: result `double`; - implicit cast `int 2` to `double 2.0 @1` -> `double 2.0 @1`; - add `double 2.0 @1` and `double 2.0 @0` -> `double 4.0`; - store `double 4.0` to `d` -<2> declare `def x`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `x`; -<3> declare `float f`; - load from `x` -> `def`; - implicit cast `def` to `int 1` -> `int 1`; - promote `int 1` and `float 2.0`: result `float`; - implicit cast `int 1` to `float 1.0` -> `float `1.0`; - add `float 1.0` and `float 2.0` -> `float 3.0`; - store `float 3.0` to `f`; - (note this example illustrates promotion done at run-time as promotion - done at compile-time would have resolved to a `def` type value) - -[[allowed-casts]] -==== Allowed Casts - -The following tables show all allowed casts. Read the tables row by row, where -the original type is shown in the first column, and each subsequent column -indicates whether a cast to the specified target type is implicit (I), -explicit (E), boxed/unboxed for methods only (A), a reference type cast (@), -or is not allowed (-). See <> -for allowed reference type casts. - -*Primitive/Reference Types* - -[cols="<3,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | O | N | T | b | y | s | c | i | j | f | d | B | Y | S | C | I | J | F | D | R | def -| Object ( O ) | | @ | @ | - | - | - | - | - | - | - | - | @ | @ | @ | @ | @ | @ | @ | @ | @ | I -| Number ( N ) | I | | - | - | - | - | - | - | - | - | - | - | @ | @ | - | @ | @ | @ | @ | @ | I -| String ( T ) | I | - | | - | - | - | - | - | - | - | - | - | - | - | E | - | - | - | - | - | I -| boolean ( b ) | A | - | - | | - | - | - | - | - | - | - | A | - | - | - | - | - | - | - | - | I -| byte ( y ) | A | A | - | - | | I | E | I | I | I | I | - | A | A | - | A | A | A | A | - | I -| short ( s ) | A | A | - | - | E | | E | I | I | I | I | - | - | A | - | A | A | A | A | - | I -| char ( c ) | A | - | E | - | E | E | | I | I | I | I | - | - | - | A | A | A | A | A | - | I -| int ( i ) | A | A | - | - | E | E | E | | I | I | I | - | - | - | - | A | A | A | A | - | I -| long ( j ) | A | A | - | - | E | E | E | E | | I | I | - | - | - | - | - | A | A | A | - | I -| float ( f ) | A | A | - | - | E | E | E | E | E | | I | - | - | - | - | - | - | A | A | - | I -| double ( d ) | A | A | - | - | E | E | E | E | E | E | | - | - | - | - | - | - | - | A | - | I -| Boolean ( B ) | A | - | - | A | - | - | - | - | - | - | - | | - | - | - | - | - | - | - | @ | I -| Byte ( Y ) | A | I | - | - | A | A | - | A | A | A | A | - | | A | - | A | A | A | A | @ | I -| Short ( S ) | A | I | - | - | - | A | - | A | A | A | A | - | - | | - | A | A | A | A | @ | I -| Character ( C ) | A | - | - | - | - | - | A | A | A | A | A | - | - | - | | A | A | A | A | @ | I -| Integer ( I ) | A | - | - | - | - | - | - | A | A | A | A | - | - | - | - | | A | A | A | @ | I -| Long ( J ) | A | - | - | - | - | - | - | - | A | A | A | - | - | - | - | - | | A | A | @ | I -| Float ( F ) | A | - | - | - | - | - | - | - | - | A | A | - | - | - | - | - | - | | A | @ | I -| Double ( D ) | A | - | - | - | - | - | - | - | - | - | A | - | - | - | - | - | - | - | | @ | I -| Reference ( R ) | I | @ | @ | - | - | - | - | - | - | - | - | @ | @ | @ | @ | @ | @ | @ | @ | @ | I -|==== - -*`def` Type* - -[cols="<3,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | O | N | T | b | y | s | c | i | j | f | d | B | Y | S | C | I | J | F | D | R -| def as String | I | - | I | - | - | - | E | - | - | - | - | - | - | - | E | - | - | - | - | @ -| def as boolean/Boolean | I | - | - | I | - | - | - | - | - | - | - | I | - | - | - | - | - | - | - | @ -| def as byte/Byte | I | - | - | - | I | I | E | I | I | I | I | - | I | I | E | I | I | I | I | @ -| def as short/Short | I | - | - | - | E | I | E | I | I | I | I | - | E | I | E | I | I | I | I | @ -| def as char/Character | I | - | - | - | E | E | I | I | I | I | I | - | E | E | I | I | I | I | I | @ -| def as int/Integer | I | - | - | - | E | E | E | I | I | I | I | - | E | E | E | I | I | I | I | @ -| def as long/Long | I | - | - | - | E | E | E | E | I | I | I | - | E | E | E | E | I | I | I | @ -| def as float/Float | I | - | - | - | E | E | E | E | E | I | I | - | E | E | E | E | E | I | I | @ -| def as double/Double | I | - | - | - | E | E | E | E | E | E | I | - | E | E | E | E | E | E | I | @ -| def as Reference | @ | @ | @ | - | - | - | - | - | - | - | - | @ | @ | @ | @ | @ | @ | @ | @ | @ -|==== diff --git a/docs/painless/painless-lang-spec/painless-comments.asciidoc b/docs/painless/painless-lang-spec/painless-comments.asciidoc deleted file mode 100644 index bfd3594431ebd..0000000000000 --- a/docs/painless/painless-lang-spec/painless-comments.asciidoc +++ /dev/null @@ -1,52 +0,0 @@ -[[painless-comments]] -=== Comments - -Use a comment to annotate or explain code within a script. Use the `//` token -anywhere on a line to specify a single-line comment. All characters from the -`//` token to the end of the line are ignored. Use an opening `/*` token and a -closing `*/` token to specify a multi-line comment. Multi-line comments can -start anywhere on a line, and all characters in between the `/*` token and `*/` -token are ignored. A comment is included anywhere within a script. - -*Grammar* - -[source,ANTLR4] ----- -SINGLE_LINE_COMMENT: '//' .*? [\n\r]; -MULTI_LINE_COMMENT: '/*' .*? '*/'; ----- - -*Examples* - -* Single-line comments. -+ -[source,Painless] ----- -// single-line comment - -int value; // single-line comment ----- -+ -* Multi-line comments. -+ -[source,Painless] ----- -/* multi- - line - comment */ - -int value; /* multi- - line - comment */ value = 0; - -int value; /* multi-line - comment */ - -/* multi-line - comment */ int value; - -int value; /* multi-line - comment */ value = 0; - -int value; /* multi-line comment */ value = 0; ----- diff --git a/docs/painless/painless-lang-spec/painless-functions.asciidoc b/docs/painless/painless-lang-spec/painless-functions.asciidoc deleted file mode 100644 index 535f3b94ea308..0000000000000 --- a/docs/painless/painless-lang-spec/painless-functions.asciidoc +++ /dev/null @@ -1,24 +0,0 @@ -[[painless-functions]] -=== Functions - -A function is a named piece of code comprised of one-to-many statements to -perform a specific task. A function is called multiple times in a single script -to repeat its specific task. A parameter is a named type value available as a -<> within the statement(s) of a function. A -function specifies zero-to-many parameters, and when a function is called a -value is specified per parameter. An argument is a value passed into a function -at the point of call. A function specifies a return type value, though if the -type is <> then no value is returned. Any non-void type return -value is available for use within an <> or is -discarded otherwise. - -You can declare functions at the beginning of a Painless script, for example: - -[source,painless] ---------------------------------------------------------- -boolean isNegative(def x) { x < 0 } -... -if (isNegative(someVar)) { - ... -} ---------------------------------------------------------- \ No newline at end of file diff --git a/docs/painless/painless-lang-spec/painless-identifiers.asciidoc b/docs/painless/painless-lang-spec/painless-identifiers.asciidoc deleted file mode 100644 index d2678b528ea51..0000000000000 --- a/docs/painless/painless-lang-spec/painless-identifiers.asciidoc +++ /dev/null @@ -1,33 +0,0 @@ -[[painless-identifiers]] -=== Identifiers - -Use an identifier as a named token to specify a -<>, <>, -<>, <>, or -<>. - -*Errors* - -If a <> is used as an identifier. - -*Grammar* -[source,ANTLR4] ----- -ID: [_a-zA-Z] [_a-zA-Z-0-9]*; ----- - -*Examples* - -* Variations of identifiers. -+ -[source,Painless] ----- -a -Z -id -list -list0 -MAP25 -_map25 -Map_25 ----- diff --git a/docs/painless/painless-lang-spec/painless-keywords.asciidoc b/docs/painless/painless-lang-spec/painless-keywords.asciidoc deleted file mode 100644 index 24371d3713c0b..0000000000000 --- a/docs/painless/painless-lang-spec/painless-keywords.asciidoc +++ /dev/null @@ -1,17 +0,0 @@ -[[painless-keywords]] -=== Keywords - -Keywords are reserved tokens for built-in language features. - -*Errors* - -* If a keyword is used as an <>. - -*Keywords* - -[cols="^1,^1,^1,^1,^1"] -|==== -| if | else | while | do | for -| in | continue | break | return | new -| try | catch | throw | this | instanceof -|==== diff --git a/docs/painless/painless-lang-spec/painless-lambdas.asciidoc b/docs/painless/painless-lang-spec/painless-lambdas.asciidoc deleted file mode 100644 index e6694229a0cef..0000000000000 --- a/docs/painless/painless-lang-spec/painless-lambdas.asciidoc +++ /dev/null @@ -1,15 +0,0 @@ -[[painless-lambdas]] -=== Lambdas -Lambda expressions and method references work the same as in https://docs.oracle.com/javase/tutorial/java/javaOO/lambdaexpressions.html[Java]. - -[source,painless] ---------------------------------------------------------- -list.removeIf(item -> item == 2); -list.removeIf((int item) -> item == 2); -list.removeIf((int item) -> { item == 2 }); -list.sort((x, y) -> x - y); -list.sort(Integer::compare); ---------------------------------------------------------- - -You can make method references to functions within the script with `this`, -for example `list.sort(this::mycompare)`. \ No newline at end of file diff --git a/docs/painless/painless-lang-spec/painless-literals.asciidoc b/docs/painless/painless-lang-spec/painless-literals.asciidoc deleted file mode 100644 index 99c93dc858ad8..0000000000000 --- a/docs/painless/painless-lang-spec/painless-literals.asciidoc +++ /dev/null @@ -1,125 +0,0 @@ -[[painless-literals]] -=== Literals - -Use a literal to specify a value directly in an -<>. - -[[integer-literals]] -==== Integers - -Use an integer literal to specify an integer type value in decimal, octal, or -hex notation of a <> `int`, `long`, `float`, -or `double`. Use the following single letter designations to specify the -primitive type: `l` or `L` for `long`, `f` or `F` for `float`, and `d` or `D` -for `double`. If not specified, the type defaults to `int`. Use `0` as a prefix -to specify an integer literal as octal, and use `0x` or `0X` as a prefix to -specify an integer literal as hex. - -*Grammar* - -[source,ANTLR4] ----- -INTEGER: '-'? ( '0' | [1-9] [0-9]* ) [lLfFdD]?; -OCTAL: '-'? '0' [0-7]+ [lL]?; -HEX: '-'? '0' [xX] [0-9a-fA-F]+ [lL]?; ----- - -*Examples* - -* Integer literals. -+ -[source,Painless] ----- -0 <1> -0D <2> -1234L <3> --90f <4> --022 <5> -0xF2A <6> ----- -+ -<1> `int 0` -<2> `double 0.0` -<3> `long 1234` -<4> `float -90.0` -<5> `int -18` in octal -<6> `int 3882` in hex - -[[float-literals]] -==== Floats - -Use a floating point literal to specify a floating point type value of a -<> `float` or `double`. Use the following -single letter designations to specify the primitive type: `f` or `F` for `float` -and `d` or `D` for `double`. If not specified, the type defaults to `double`. - -*Grammar* - -[source,ANTLR4] ----- -DECIMAL: '-'? ( '0' | [1-9] [0-9]* ) (DOT [0-9]+)? EXPONENT? [fFdD]?; -EXPONENT: ( [eE] [+\-]? [0-9]+ ); ----- - -*Examples* - -* Floating point literals. -+ -[source,Painless] ----- -0.0 <1> -1E6 <2> -0.977777 <3> --126.34 <4> -89.9F <5> ----- -+ -<1> `double 0.0` -<2> `double 1000000.0` in exponent notation -<3> `double 0.977777` -<4> `double -126.34` -<5> `float 89.9` - -[[string-literals]] -==== Strings - -Use a string literal to specify a <> value with -either single-quotes or double-quotes. Use a `\"` token to include a -double-quote as part of a double-quoted string literal. Use a `\'` token to -include a single-quote as part of a single-quoted string literal. Use a `\\` -token to include a backslash as part of any string literal. - -*Grammar* - -[source,ANTLR4] ----- -STRING: ( '"' ( '\\"' | '\\\\' | ~[\\"] )*? '"' ) - | ( '\'' ( '\\\'' | '\\\\' | ~[\\'] )*? '\'' ); ----- - -*Examples* - -* String literals using single-quotes. -+ -[source,Painless] ----- -'single-quoted string literal' -'\'single-quoted with escaped single-quotes\' and backslash \\' -'single-quoted with non-escaped "double-quotes"' ----- -+ -* String literals using double-quotes. -+ -[source,Painless] ----- -"double-quoted string literal" -"\"double-quoted with escaped double-quotes\" and backslash: \\" -"double-quoted with non-escaped 'single-quotes'" ----- - -[[character-literals]] -==== Characters - -Character literals are not specified directly. Instead, use the -<> to convert a `String` type value -into a `char` type value. diff --git a/docs/painless/painless-lang-spec/painless-operators-array.asciidoc b/docs/painless/painless-lang-spec/painless-operators-array.asciidoc deleted file mode 100644 index ad23a980cb4b8..0000000000000 --- a/docs/painless/painless-lang-spec/painless-operators-array.asciidoc +++ /dev/null @@ -1,294 +0,0 @@ -[[painless-operators-array]] -=== Operators: Array - -[[array-initialization-operator]] -==== Array Initialization - -Use the `array initialization operator '[] {}'` to allocate a single-dimensional -<> instance to the heap with a set of pre-defined -elements. Each value used to initialize an element in the array type instance is -cast to the specified element type value upon insertion. The order of specified -values is maintained. - -*Errors* - -* If a value is not castable to the specified type value. - -*Grammar* - -[source,ANTLR4] ----- -array_initialization: 'new' TYPE '[' ']' '{' expression_list '}' - | 'new' TYPE '[' ']' '{' '}'; -expression_list: expression (',' expression); ----- - -*Example:* - -* Array initialization with static values. -+ -[source,Painless] ----- -int[] x = new int[] {1, 2, 3}; <1> ----- -+ -<1> declare `int[] x`; - allocate `1-d int array` instance with `length [3]` - -> `1-d int array reference`; - store `int 1` to `index [0]` of `1-d int array reference`; - store `int 2` to `index [1]` of `1-d int array reference`; - store `int 3` to `index [2]` of `1-d int array reference`; - store `1-d int array reference` to `x`; -+ -* Array initialization with non-static values. -+ -[source,Painless] ----- -int i = 1; <1> -long l = 2L; <2> -float f = 3.0F; <3> -double d = 4.0; <4> -String s = "5"; <5> -def array = new def[] {i, l, f*d, s}; <6> ----- -+ -<1> declare `int i`; - store `int 1` to `i` -<2> declare `long l`; - store `long 2` to `l` -<3> declare `float f`; - store `float 3.0` to `f` -<4> declare `double d`; - store `double 4.0` to `d` -<5> declare `String s`; - store `String "5"` to `s` -<6> declare `def array`; - allocate `1-d def array` instance with `length [4]` - -> `1-d def array reference`; - load from `i` -> `int 1`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `index [0]` of `1-d def array reference`; - load from `l` -> `long 2`; - implicit cast `long 2` to `def` -> `def`; - store `def` to `index [1]` of `1-d def array reference`; - load from `f` -> `float 3.0`; - load from `d` -> `double 4.0`; - promote `float 3.0` and `double 4.0`: result `double`; - implicit cast `float 3.0` to `double 3.0` -> `double 3.0`; - multiply `double 3.0` and `double 4.0` -> `double 12.0`; - implicit cast `double 12.0` to `def` -> `def`; - store `def` to `index [2]` of `1-d def array reference`; - load from `s` -> `String "5"`; - implicit cast `String "5"` to `def` -> `def`; - store `def` to `index [3]` of `1-d def array reference`; - implicit cast `1-d int array reference` to `def` -> `def`; - store `def` to `array` - -[[array-access-operator]] -==== Array Access - -Use the `array access operator '[]'` to store a value to or load a value from -an <> value. Each element of an array type value is -accessed with an `int` type value to specify the index to store/load. The range -of elements within an array that are accessible is `[0, size)` where size is the -number of elements specified at the time of allocation. Use a negative `int` -type value as an index to access an element in reverse from the end of an array -type value within a range of `[-size, -1]`. - -*Errors* - -* If a value other than an `int` type value or a value that is castable to an - `int` type value is provided as an index. -* If an element is accessed outside of the valid ranges. - -*Grammar* - -[source,ANTLR4] ----- -brace_access: '[' expression ']' ----- - -*Examples* - -* Array access with a single-dimensional array. -+ -[source,Painless] ----- -int[] x = new int[2]; <1> -x[0] = 2; <2> -x[1] = 5; <3> -int y = x[0] + x[1]; <4> -int z = 1; <5> -int i = x[z]; <6> ----- -+ -<1> declare `int[] x`; - allocate `1-d int array` instance with `length [2]` - -> `1-d int array reference`; - store `1-d int array reference` to `x` -<2> load from `x` -> `1-d int array reference`; - store `int 2` to `index [0]` of `1-d int array reference`; -<3> load from `x` -> `1-d int array reference`; - store `int 5` to `index [1]` of `1-d int array reference`; -<4> declare `int y`; - load from `x` -> `1-d int array reference`; - load from `index [0]` of `1-d int array reference` -> `int 2`; - load from `x` -> `1-d int array reference`; - load from `index [1]` of `1-d int array reference` -> `int 5`; - add `int 2` and `int 5` -> `int 7`; - store `int 7` to `y` -<5> declare `int z`; - store `int 1` to `z`; -<6> declare `int i`; - load from `x` -> `1-d int array reference`; - load from `z` -> `int 1`; - load from `index [1]` of `1-d int array reference` -> `int 5`; - store `int 5` to `i`; -+ -* Array access with the `def` type. -+ -[source,Painless] ----- -def d = new int[2]; <1> -d[0] = 2; <2> -d[1] = 5; <3> -def x = d[0] + d[1]; <4> -def y = 1; <5> -def z = d[y]; <6> ----- -+ -<1> declare `def d`; - allocate `1-d int array` instance with `length [2]` - -> `1-d int array reference`; - implicit cast `1-d int array reference` to `def` -> `def`; - store `def` to `d` -<2> load from `d` -> `def` - implicit cast `def` to `1-d int array reference` - -> `1-d int array reference`; - store `int 2` to `index [0]` of `1-d int array reference`; -<3> load from `d` -> `def` - implicit cast `def` to `1-d int array reference` - -> `1-d int array reference`; - store `int 5` to `index [1]` of `1-d int array reference`; -<4> declare `int x`; - load from `d` -> `def` - implicit cast `def` to `1-d int array reference` - -> `1-d int array reference`; - load from `index [0]` of `1-d int array reference` -> `int 2`; - load from `d` -> `def` - implicit cast `def` to `1-d int array reference` - -> `1-d int array reference`; - load from `index [1]` of `1-d int array reference` -> `int 5`; - add `int 2` and `int 5` -> `int 7`; - implicit cast `int 7` to `def` -> `def`; - store `def` to `x` -<5> declare `def y`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `y`; -<6> declare `int i`; - load from `d` -> `def` - implicit cast `def` to `1-d int array reference` - -> `1-d int array reference`; - load from `y` -> `def`; - implicit cast `def` to `int 1` -> `int 1`; - load from `index [1]` of `1-d int array reference` -> `int 5`; - implicit cast `int 5` to `def`; - store `def` to `z`; -+ -* Array access with a multi-dimensional array. -+ -[source,Painless] ----- -int[][][] ia3 = new int[2][3][4]; <1> -ia3[1][2][3] = 99; <2> -int i = ia3[1][2][3]; <3> ----- -+ -<1> declare `int[][][] ia`; - allocate `3-d int array` instance with length `[2, 3, 4]` - -> `3-d int array reference`; - store `3-d int array reference` to `ia3` -<2> load from `ia3` -> `3-d int array reference`; - store `int 99` to `index [1, 2, 3]` of `3-d int array reference` -<3> declare `int i`; - load from `ia3` -> `3-d int array reference`; - load from `index [1, 2, 3]` of `3-d int array reference` -> `int 99`; - store `int 99` to `i` - -[[array-length-operator]] -==== Array Length - -An array type value contains a read-only member field named `length`. The -`length` field stores the size of the array as an `int` type value where size is -the number of elements specified at the time of allocation. Use the -<> to load the field `length` -from an array type value. - -*Examples* - -* Access the `length` field. -+ -[source,Painless] ----- -int[] x = new int[10]; <1> -int l = x.length; <2> ----- -<1> declare `int[] x`; - allocate `1-d int array` instance with `length [2]` - -> `1-d int array reference`; - store `1-d int array reference` to `x` -<2> declare `int l`; - load `x` -> `1-d int array reference`; - load `length` from `1-d int array reference` -> `int 10`; - store `int 10` to `l`; - -[[new-array-operator]] -==== New Array - -Use the `new array operator 'new []'` to allocate an array type instance to -the heap. Specify the element type following the `new` token. Specify each -dimension with the `[` and `]` tokens following the element type name. The size -of each dimension is specified by an `int` type value in between each set of `[` -and `]` tokens. - -*Errors* - -* If a value other than an `int` type value or a value that is castable to an - `int` type value is specified for a dimension's size. - -*Grammar* - -[source,ANTLR4] ----- -new_array: 'new' TYPE ('[' expression ']')+; ----- - -*Examples* - -* Allocation of different array types. -+ -[source,Painless] ----- -int[] x = new int[5]; <1> -x = new int[10]; <2> -int y = 2; <3> -def z = new def[y][y*2]; <4> ----- -+ -<1> declare `int[] x`; - allocate `1-d int array` instance with `length [5]` - -> `1-d int array reference`; - store `1-d int array reference` to `x` -<2> allocate `1-d int array` instance with `length [10]` - -> `1-d int array reference`; - store `1-d int array reference` to `x` -<3> declare `int y`; - store `int 2` to `y`; -<4> declare `def z`; - load from `y` -> `int 2 @0`; - load from `y` -> `int 2 @1`; - multiply `int 2 @1` by `int 2 @2` -> `int 4`; - allocate `2-d int array` instance with length `[2, 4]` - -> `2-d int array reference`; - implicit cast `2-d int array reference` to `def` -> `def`; - store `def` to `z`; diff --git a/docs/painless/painless-lang-spec/painless-operators-boolean.asciidoc b/docs/painless/painless-lang-spec/painless-operators-boolean.asciidoc deleted file mode 100644 index 6f9481aa4ecd3..0000000000000 --- a/docs/painless/painless-lang-spec/painless-operators-boolean.asciidoc +++ /dev/null @@ -1,1420 +0,0 @@ -[[painless-operators-boolean]] -=== Operators: Boolean - -[[boolean-not-operator]] -==== Boolean Not - -Use the `boolean not operator '!'` to NOT a `boolean` type value where `true` is -flipped to `false` and `false` is flipped to `true`. - -*Errors* - -* If a value other than a `boolean` type value or a value that is castable to a - `boolean` type value is given. - -*Truth* - -[options="header",cols="<1,<1"] -|==== -| original | result -| true | false -| false | true -|==== - -*Grammar* - -[source,ANTLR4] ----- -boolean_not: '!' expression; ----- - -*Examples* - -* Boolean not with the `boolean` type. -+ -[source,Painless] ----- -boolean x = !false; <1> -boolean y = !x; <2> ----- -<1> declare `boolean x`; - boolean not `boolean false` -> `boolean true`; - store `boolean true` to `x` -<2> declare `boolean y`; - load from `x` -> `boolean true`; - boolean not `boolean true` -> `boolean false`; - store `boolean false` to `y` -+ -* Boolean not with the `def` type. -+ -[source,Painless] ----- -def y = true; <1> -def z = !y; <2> ----- -+ -<1> declare `def y`; - implicit cast `boolean true` to `def` -> `def`; - store `true` to `y` -<2> declare `def z`; - load from `y` -> `def`; - implicit cast `def` to `boolean true` -> boolean `true`; - boolean not `boolean true` -> `boolean false`; - implicit cast `boolean false` to `def` -> `def`; - store `def` to `z` - -[[greater-than-operator]] -==== Greater Than - -Use the `greater than operator '>'` to COMPARE two numeric type values where a -resultant `boolean` type value is `true` if the left-hand side value is greater -than to the right-hand side value and `false` otherwise. - -*Errors* - -* If either the evaluated left-hand side or the evaluated right-hand side is a - non-numeric value. - -*Grammar* - -[source,ANTLR4] ----- -greater_than: expression '>' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | float | double | def -| byte | int | int | int | int | long | float | double | def -| short | int | int | int | int | long | float | double | def -| char | int | int | int | int | long | float | double | def -| int | int | int | int | int | long | float | double | def -| long | long | long | long | long | long | float | double | def -| float | float | float | float | float | float | float | double | def -| double | double | double | double | double | double | double | double | def -| def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Greater than with different numeric types. -+ -[source,Painless] ----- -boolean x = 5 > 4; <1> -double y = 6.0; <2> -x = 6 > y; <3> ----- -+ -<1> declare `boolean x`; - greater than `int 5` and `int 4` -> `boolean true`; - store `boolean true` to `x`; -<2> declare `double y`; - store `double 6.0` to `y`; -<3> load from `y` -> `double 6.0 @0`; - promote `int 6` and `double 6.0`: result `double`; - implicit cast `int 6` to `double 6.0 @1` -> `double 6.0 @1`; - greater than `double 6.0 @1` and `double 6.0 @0` -> `boolean false`; - store `boolean false` to `x` -+ -* Greater than with `def` type. -+ -[source,Painless] ----- -int x = 5; <1> -def y = 7.0; <2> -def z = y > 6.5; <3> -def a = x > y; <4> ----- -+ -<1> declare `int x`; - store `int 5` to `x` -<2> declare `def y`; - implicit cast `double 7.0` to `def` -> `def`; - store `def` to `y` -<3> declare `def z`; - load from `y` -> `def`; - implicit cast `def` to `double 7.0` -> `double 7.0`; - greater than `double 7.0` and `double 6.5` -> `boolean true`; - implicit cast `boolean true` to `def` -> `def`; - store `def` to `z` -<4> declare `def a`; - load from `y` -> `def`; - implicit cast `def` to `double 7.0` -> `double 7.0`; - load from `x` -> `int 5`; - promote `int 5` and `double 7.0`: result `double`; - implicit cast `int 5` to `double 5.0` -> `double 5.0`; - greater than `double 5.0` and `double 7.0` -> `boolean false`; - implicit cast `boolean false` to `def` -> `def`; - store `def` to `z` - -[[greater-than-or-equal-operator]] -==== Greater Than Or Equal - -Use the `greater than or equal operator '>='` to COMPARE two numeric type values -where a resultant `boolean` type value is `true` if the left-hand side value is -greater than or equal to the right-hand side value and `false` otherwise. - -*Errors* - -* If either the evaluated left-hand side or the evaluated right-hand side is a - non-numeric value. - -*Grammar* - -[source,ANTLR4] ----- -greater_than_or_equal: expression '>=' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | float | double | def -| byte | int | int | int | int | long | float | double | def -| short | int | int | int | int | long | float | double | def -| char | int | int | int | int | long | float | double | def -| int | int | int | int | int | long | float | double | def -| long | long | long | long | long | long | float | double | def -| float | float | float | float | float | float | float | double | def -| double | double | double | double | double | double | double | double | def -| def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Greater than or equal with different numeric types. -+ -[source,Painless] ----- -boolean x = 5 >= 4; <1> -double y = 6.0; <2> -x = 6 >= y; <3> ----- -+ -<1> declare `boolean x`; - greater than or equal `int 5` and `int 4` -> `boolean true`; - store `boolean true` to `x` -<2> declare `double y`; - store `double 6.0` to `y` -<3> load from `y` -> `double 6.0 @0`; - promote `int 6` and `double 6.0`: result `double`; - implicit cast `int 6` to `double 6.0 @1` -> `double 6.0 @1`; - greater than or equal `double 6.0 @1` and `double 6.0 @0` -> `boolean true`; - store `boolean true` to `x` -+ -* Greater than or equal with the `def` type. -+ -[source,Painless] ----- -int x = 5; <1> -def y = 7.0; <2> -def z = y >= 7.0; <3> -def a = x >= y; <4> ----- -+ -<1> declare `int x`; - store `int 5` to `x`; -<2> declare `def y` - implicit cast `double 7.0` to `def` -> `def`; - store `def` to `y` -<3> declare `def z`; - load from `y` -> `def`; - implicit cast `def` to `double 7.0 @0` -> `double 7.0 @0`; - greater than or equal `double 7.0 @0` and `double 7.0 @1` -> `boolean true`; - implicit cast `boolean true` to `def` -> `def`; - store `def` to `z` -<4> declare `def a`; - load from `y` -> `def`; - implicit cast `def` to `double 7.0` -> `double 7.0`; - load from `x` -> `int 5`; - promote `int 5` and `double 7.0`: result `double`; - implicit cast `int 5` to `double 5.0` -> `double 5.0`; - greater than or equal `double 5.0` and `double 7.0` -> `boolean false`; - implicit cast `boolean false` to `def` -> `def`; - store `def` to `z` - -[[less-than-operator]] -==== Less Than - -Use the `less than operator '<'` to COMPARE two numeric type values where a -resultant `boolean` type value is `true` if the left-hand side value is less -than to the right-hand side value and `false` otherwise. - -*Errors* - -* If either the evaluated left-hand side or the evaluated right-hand side is a - non-numeric value. - -*Grammar* - -[source,ANTLR4] ----- -less_than: expression '<' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | float | double | def -| byte | int | int | int | int | long | float | double | def -| short | int | int | int | int | long | float | double | def -| char | int | int | int | int | long | float | double | def -| int | int | int | int | int | long | float | double | def -| long | long | long | long | long | long | float | double | def -| float | float | float | float | float | float | float | double | def -| double | double | double | double | double | double | double | double | def -| def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Less than with different numeric types. -+ -[source,Painless] ----- -boolean x = 5 < 4; <1> -double y = 6.0; <2> -x = 6 < y; <3> ----- -+ -<1> declare `boolean x`; - less than `int 5` and `int 4` -> `boolean false`; - store `boolean false` to `x` -<2> declare `double y`; - store `double 6.0` to `y` -<3> load from `y` -> `double 6.0 @0`; - promote `int 6` and `double 6.0`: result `double`; - implicit cast `int 6` to `double 6.0 @1` -> `double 6.0 @1`; - less than `double 6.0 @1` and `double 6.0 @0` -> `boolean false`; - store `boolean false` to `x` -+ -* Less than with the `def` type. -+ -[source,Painless] ----- -int x = 5; <1> -def y = 7.0; <2> -def z = y < 6.5; <3> -def a = x < y; <4> ----- -+ -<1> declare `int x`; - store `int 5` to `x` -<2> declare `def y`; - implicit cast `double 7.0` to `def` -> `def`; - store `def` to `y` -<3> declare `def z`; - load from `y` -> `def`; - implicit cast `def` to `double 7.0` -> `double 7.0`; - less than `double 7.0` and `double 6.5` -> `boolean false`; - implicit cast `boolean false` to `def` -> `def`; - store `def` to `z` -<4> declare `def a`; - load from `y` -> `def`; - implicit cast `def` to `double 7.0` -> `double 7.0`; - load from `x` -> `int 5`; - promote `int 5` and `double 7.0`: result `double`; - implicit cast `int 5` to `double 5.0` -> `double 5.0`; - less than `double 5.0` and `double 7.0` -> `boolean true`; - implicit cast `boolean true` to `def` -> `def`; - store `def` to `z` - -[[less-than-or-equal-operator]] -==== Less Than Or Equal - -Use the `less than or equal operator '<='` to COMPARE two numeric type values -where a resultant `boolean` type value is `true` if the left-hand side value is -less than or equal to the right-hand side value and `false` otherwise. - -*Errors* - -* If either the evaluated left-hand side or the evaluated right-hand side is a - non-numeric value. - -*Grammar* - -[source,ANTLR4] ----- -greater_than_or_equal: expression '<=' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | float | double | def -| byte | int | int | int | int | long | float | double | def -| short | int | int | int | int | long | float | double | def -| char | int | int | int | int | long | float | double | def -| int | int | int | int | int | long | float | double | def -| long | long | long | long | long | long | float | double | def -| float | float | float | float | float | float | float | double | def -| double | double | double | double | double | double | double | double | def -| def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Less than or equal with different numeric types. -+ -[source,Painless] ----- -boolean x = 5 <= 4; <1> -double y = 6.0; <2> -x = 6 <= y; <3> ----- -+ -<1> declare `boolean x`; - less than or equal `int 5` and `int 4` -> `boolean false`; - store `boolean true` to `x` -<2> declare `double y`; - store `double 6.0` to `y` -<3> load from `y` -> `double 6.0 @0`; - promote `int 6` and `double 6.0`: result `double`; - implicit cast `int 6` to `double 6.0 @1` -> `double 6.0 @1`; - less than or equal `double 6.0 @1` and `double 6.0 @0` -> `boolean true`; - store `boolean true` to `x` -+ -* Less than or equal with the `def` type. -+ -[source,Painless] ----- -int x = 5; <1> -def y = 7.0; <2> -def z = y <= 7.0; <3> -def a = x <= y; <4> ----- -+ -<1> declare `int x`; - store `int 5` to `x`; -<2> declare `def y`; - implicit cast `double 7.0` to `def` -> `def`; - store `def` to `y`; -<3> declare `def z`; - load from `y` -> `def`; - implicit cast `def` to `double 7.0 @0` -> `double 7.0 @0`; - less than or equal `double 7.0 @0` and `double 7.0 @1` -> `boolean true`; - implicit cast `boolean true` to `def` -> `def`; - store `def` to `z` -<4> declare `def a`; - load from `y` -> `def`; - implicit cast `def` to `double 7.0` -> `double 7.0`; - load from `x` -> `int 5`; - promote `int 5` and `double 7.0`: result `double`; - implicit cast `int 5` to `double 5.0` -> `double 5.0`; - less than or equal `double 5.0` and `double 7.0` -> `boolean true`; - implicit cast `boolean true` to `def` -> `def`; - store `def` to `z` - -[[instanceof-operator]] -==== Instanceof - -Use the `instanceof operator` to COMPARE the variable/field type to a -specified reference type using the reference type name where a resultant -`boolean` type value is `true` if the variable/field type is the same as or a -descendant of the specified reference type and false otherwise. - -*Errors* - -* If the reference type name doesn't exist as specified by the right-hand side. - -*Grammar* - -[source,ANTLR4] ----- -instance_of: ID 'instanceof' TYPE; ----- - -*Examples* - -* Instance of with different reference types. -+ -[source,Painless] ----- -Map m = new HashMap(); <1> -boolean a = m instanceof HashMap; <2> -boolean b = m instanceof Map; <3> ----- -+ -<1> declare `Map m`; - allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `Map reference`; - store `Map reference` to `m` -<2> declare `boolean a`; - load from `m` -> `Map reference`; - implicit cast `Map reference` to `HashMap reference` -> `HashMap reference`; - instanceof `HashMap reference` and `HashMap` -> `boolean true`; - store `boolean true` to `a` -<3> declare `boolean b`; - load from `m` -> `Map reference`; - implicit cast `Map reference` to `HashMap reference` -> `HashMap reference`; - instanceof `HashMap reference` and `Map` -> `boolean true`; - store `true` to `b`; - (note `HashMap` is a descendant of `Map`) -+ -* Instance of with the `def` type. -+ -[source,Painless] ----- -def d = new ArrayList(); <1> -boolean a = d instanceof List; <2> -boolean b = d instanceof Map; <3> ----- -+ -<1> declare `def d`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `def` -> `def`; - store `def` to `d` -<2> declare `boolean a`; - load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - instanceof `ArrayList reference` and `List` -> `boolean true`; - store `boolean true` to `a`; - (note `ArrayList` is a descendant of `List`) -<3> declare `boolean b`; - load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - instanceof `ArrayList reference` and `Map` -> `boolean false`; - store `boolean false` to `a`; - (note `ArrayList` is not a descendant of `Map`) - -[[equality-equals-operator]] -==== Equality Equals - -Use the `equality equals operator '=='` to COMPARE two values where a resultant -`boolean` type value is `true` if the two values are equal and `false` -otherwise. The member method, `equals`, is implicitly called when the values are -reference type values where the first value is the target of the call and the -second value is the argument. This operation is null-safe where if both values -are `null` the resultant `boolean` type value is `true`, and if only one value -is `null` the resultant `boolean` type value is `false`. A valid comparison is -between `boolean` type values, numeric type values, or reference type values. - -*Errors* - -* If a comparison is made between a `boolean` type value and numeric type value. -* If a comparison is made between a primitive type value and a reference type - value. - -*Grammar* - -[source,ANTLR4] ----- -equality_equals: expression '==' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | boolean | byte | short | char | int | long | float | double | Reference | def -| boolean | boolean | - | - | - | - | - | - | - | - | def -| byte | - | int | int | int | int | long | float | double | - | def -| short | - | int | int | int | int | long | float | double | - | def -| char | - | int | int | int | int | long | float | double | - | def -| int | - | int | int | int | int | long | float | double | - | def -| long | - | long | long | long | long | long | float | double | - | def -| float | - | float | float | float | float | float | float | double | - | def -| double | - | double | double | double | double | double | double | double | - | def -| Reference | - | - | - | - | - | - | - | - | Object | def -| def | def | def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Equality equals with the `boolean` type. -+ -[source,Painless] ----- -boolean a = true; <1> -boolean b = false; <2> -a = a == false; <3> -b = a == b; <4> ----- -+ -<1> declare `boolean a`; - store `boolean true` to `a` -<2> declare `boolean b`; - store `boolean false` to `b` -<3> load from `a` -> `boolean true`; - equality equals `boolean true` and `boolean false` -> `boolean false`; - store `boolean false` to `a` -<4> load from `a` -> `boolean false @0`; - load from `b` -> `boolean false @1`; - equality equals `boolean false @0` and `boolean false @1` - -> `boolean false`; - store `boolean false` to `b` -+ -* Equality equals with primitive types. -+ -[source,Painless] ----- -int a = 1; <1> -double b = 2.0; <2> -boolean c = a == b; <3> -c = 1 == a; <4> ----- -+ -<1> declare `int a`; - store `int 1` to `a` -<2> declare `double b`; - store `double 1.0` to `b` -<3> declare `boolean c`; - load from `a` -> `int 1`; - load from `b` -> `double 2.0`; - promote `int 1` and `double 2.0`: result `double`; - implicit cast `int 1` to `double 1.0` -> `double `1.0`; - equality equals `double 1.0` and `double 2.0` -> `boolean false`; - store `boolean false` to `c` -<4> load from `a` -> `int 1 @1`; - equality equals `int 1 @0` and `int 1 @1` -> `boolean true`; - store `boolean true` to `c` -+ -* Equal equals with reference types. -+ -[source,Painless] ----- -List a = new ArrayList(); <1> -List b = new ArrayList(); <2> -a.add(1); <3> -boolean c = a == b; <4> -b.add(1); <5> -c = a == b; <6> ----- -+ -<1> declare `List a`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `a` -<2> declare `List b`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `b` -<3> load from `a` -> `List reference`; - call `add` on `List reference` with arguments (`int 1)` -<4> declare `boolean c`; - load from `a` -> `List reference @0`; - load from `b` -> `List reference @1`; - call `equals` on `List reference @0` with arguments (`List reference @1`) - -> `boolean false`; - store `boolean false` to `c` -<5> load from `b` -> `List reference`; - call `add` on `List reference` with arguments (`int 1`) -<6> load from `a` -> `List reference @0`; - load from `b` -> `List reference @1`; - call `equals` on `List reference @0` with arguments (`List reference @1`) - -> `boolean true`; - store `boolean true` to `c` -+ -* Equality equals with `null`. -+ -[source,Painless] ----- -Object a = null; <1> -Object b = null; <2> -boolean c = a == null; <3> -c = a == b; <4> -b = new Object(); <5> -c = a == b; <6> ----- -+ -<1> declare `Object a`; - store `null` to `a` -<2> declare `Object b`; - store `null` to `b` -<3> declare `boolean c`; - load from `a` -> `null @0`; - equality equals `null @0` and `null @1` -> `boolean true`; - store `boolean true` to `c` -<4> load from `a` -> `null @0`; - load from `b` -> `null @1`; - equality equals `null @0` and `null @1` -> `boolean true`; - store `boolean true` to `c` -<5> allocate `Object` instance -> `Object reference`; - store `Object reference` to `b` -<6> load from `a` -> `Object reference`; - load from `b` -> `null`; - call `equals` on `Object reference` with arguments (`null`) - -> `boolean false`; - store `boolean false` to `c` -+ -* Equality equals with the `def` type. -+ -[source, Painless] ----- -def a = 0; <1> -def b = 1; <2> -boolean c = a == b; <3> -def d = new HashMap(); <4> -def e = new ArrayList(); <5> -c = d == e; <6> ----- -+ -<1> declare `def a`; - implicit cast `int 0` to `def` -> `def`; - store `def` to `a`; -<2> declare `def b`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `b`; -<3> declare `boolean c`; - load from `a` -> `def`; - implicit cast `a` to `int 0` -> `int 0`; - load from `b` -> `def`; - implicit cast `b` to `int 1` -> `int 1`; - equality equals `int 0` and `int 1` -> `boolean false`; - store `boolean false` to `c` -<4> declare `def d`; - allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `def` -> `def` - store `def` to `d`; -<5> declare `def e`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `def` -> `def` - store `def` to `d`; -<6> load from `d` -> `def`; - implicit cast `def` to `HashMap reference` -> `HashMap reference`; - load from `e` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - call `equals` on `HashMap reference` with arguments (`ArrayList reference`) - -> `boolean false`; - store `boolean false` to `c` - -[[equality-not-equals-operator]] -==== Equality Not Equals - -Use the `equality not equals operator '!='` to COMPARE two values where a -resultant `boolean` type value is `true` if the two values are NOT equal and -`false` otherwise. The member method, `equals`, is implicitly called when the -values are reference type values where the first value is the target of the call -and the second value is the argument with the resultant `boolean` type value -flipped. This operation is `null-safe` where if both values are `null` the -resultant `boolean` type value is `false`, and if only one value is `null` the -resultant `boolean` type value is `true`. A valid comparison is between boolean -type values, numeric type values, or reference type values. - -*Errors* - -* If a comparison is made between a `boolean` type value and numeric type value. -* If a comparison is made between a primitive type value and a reference type - value. - -*Grammar* - -[source,ANTLR4] ----- -equality_not_equals: expression '!=' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | boolean | byte | short | char | int | long | float | double | Reference | def -| boolean | boolean | - | - | - | - | - | - | - | - | def -| byte | - | int | int | int | int | long | float | double | - | def -| short | - | int | int | int | int | long | float | double | - | def -| char | - | int | int | int | int | long | float | double | - | def -| int | - | int | int | int | int | long | float | double | - | def -| long | - | long | long | long | long | long | float | double | - | def -| float | - | float | float | float | float | float | float | double | - | def -| double | - | double | double | double | double | double | double | double | - | def -| Reference | - | - | - | - | - | - | - | - | Object | def -| def | def | def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Equality not equals with the `boolean` type. -+ -[source,Painless] ----- -boolean a = true; <1> -boolean b = false; <2> -a = a != false; <3> -b = a != b; <4> ----- -+ -<1> declare `boolean a`; - store `boolean true` to `a` -<2> declare `boolean b`; - store `boolean false` to `b` -<3> load from `a` -> `boolean true`; - equality not equals `boolean true` and `boolean false` -> `boolean true`; - store `boolean true` to `a` -<4> load from `a` -> `boolean true`; - load from `b` -> `boolean false`; - equality not equals `boolean true` and `boolean false` -> `boolean true`; - store `boolean true` to `b` -+ -* Equality not equals with primitive types. -+ -[source,Painless] ----- -int a = 1; <1> -double b = 2.0; <2> -boolean c = a != b; <3> -c = 1 != a; <4> ----- -+ -<1> declare `int a`; - store `int 1` to `a` -<2> declare `double b`; - store `double 1.0` to `b` -<3> declare `boolean c`; - load from `a` -> `int 1`; - load from `b` -> `double 2.0`; - promote `int 1` and `double 2.0`: result `double`; - implicit cast `int 1` to `double 1.0` -> `double `1.0`; - equality not equals `double 1.0` and `double 2.0` -> `boolean true`; - store `boolean true` to `c` -<4> load from `a` -> `int 1 @1`; - equality not equals `int 1 @0` and `int 1 @1` -> `boolean false`; - store `boolean false` to `c` -+ -* Equality not equals with reference types. -+ -[source,Painless] ----- -List a = new ArrayList(); <1> -List b = new ArrayList(); <2> -a.add(1); <3> -boolean c = a == b; <4> -b.add(1); <5> -c = a == b; <6> ----- -+ -<1> declare `List a`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `a` -<2> declare `List b`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `b` -<3> load from `a` -> `List reference`; - call `add` on `List reference` with arguments (`int 1)` -<4> declare `boolean c`; - load from `a` -> `List reference @0`; - load from `b` -> `List reference @1`; - call `equals` on `List reference @0` with arguments (`List reference @1`) - -> `boolean false`; - boolean not `boolean false` -> `boolean true` - store `boolean true` to `c` -<5> load from `b` -> `List reference`; - call `add` on `List reference` with arguments (`int 1`) -<6> load from `a` -> `List reference @0`; - load from `b` -> `List reference @1`; - call `equals` on `List reference @0` with arguments (`List reference @1`) - -> `boolean true`; - boolean not `boolean true` -> `boolean false`; - store `boolean false` to `c` -+ -* Equality not equals with `null`. -+ -[source,Painless] ----- -Object a = null; <1> -Object b = null; <2> -boolean c = a == null; <3> -c = a == b; <4> -b = new Object(); <5> -c = a == b; <6> ----- -+ -<1> declare `Object a`; - store `null` to `a` -<2> declare `Object b`; - store `null` to `b` -<3> declare `boolean c`; - load from `a` -> `null @0`; - equality not equals `null @0` and `null @1` -> `boolean false`; - store `boolean false` to `c` -<4> load from `a` -> `null @0`; - load from `b` -> `null @1`; - equality not equals `null @0` and `null @1` -> `boolean false`; - store `boolean false` to `c` -<5> allocate `Object` instance -> `Object reference`; - store `Object reference` to `b` -<6> load from `a` -> `Object reference`; - load from `b` -> `null`; - call `equals` on `Object reference` with arguments (`null`) - -> `boolean false`; - boolean not `boolean false` -> `boolean true`; - store `boolean true` to `c` -+ -* Equality not equals with the `def` type. -+ -[source, Painless] ----- -def a = 0; <1> -def b = 1; <2> -boolean c = a == b; <3> -def d = new HashMap(); <4> -def e = new ArrayList(); <5> -c = d == e; <6> ----- -+ -<1> declare `def a`; - implicit cast `int 0` to `def` -> `def`; - store `def` to `a`; -<2> declare `def b`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `b`; -<3> declare `boolean c`; - load from `a` -> `def`; - implicit cast `a` to `int 0` -> `int 0`; - load from `b` -> `def`; - implicit cast `b` to `int 1` -> `int 1`; - equality equals `int 0` and `int 1` -> `boolean false`; - store `boolean false` to `c` -<4> declare `def d`; - allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `def` -> `def` - store `def` to `d`; -<5> declare `def e`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `def` -> `def` - store `def` to `d`; -<6> load from `d` -> `def`; - implicit cast `def` to `HashMap reference` -> `HashMap reference`; - load from `e` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - call `equals` on `HashMap reference` with arguments (`ArrayList reference`) - -> `boolean false`; - store `boolean false` to `c` - -[[identity-equals-operator]] -==== Identity Equals - -Use the `identity equals operator '==='` to COMPARE two values where a resultant -`boolean` type value is `true` if the two values are equal and `false` -otherwise. A reference type value is equal to another reference type value if -both values refer to same instance on the heap or if both values are `null`. A -valid comparison is between `boolean` type values, numeric type values, or -reference type values. - -*Errors* - -* If a comparison is made between a `boolean` type value and numeric type value. -* If a comparison is made between a primitive type value and a reference type - value. - -*Grammar* - -[source,ANTLR4] ----- -identity_equals: expression '===' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | boolean | byte | short | char | int | long | float | double | Reference | def -| boolean | boolean | - | - | - | - | - | - | - | - | def -| byte | - | int | int | int | int | long | float | double | - | def -| short | - | int | int | int | int | long | float | double | - | def -| char | - | int | int | int | int | long | float | double | - | def -| int | - | int | int | int | int | long | float | double | - | def -| long | - | long | long | long | long | long | float | double | - | def -| float | - | float | float | float | float | float | float | double | - | def -| double | - | double | double | double | double | double | double | double | - | def -| Reference | - | - | - | - | - | - | - | - | Object | def -| def | def | def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Identity equals with reference types. -+ -[source,Painless] ----- -List a = new ArrayList(); <1> -List b = new ArrayList(); <2> -List c = a; <3> -boolean c = a === b; <4> -c = a === c; <5> ----- -+ -<1> declare `List a`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `a` -<2> declare `List b`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `b` -<3> load from `a` -> `List reference`; - store `List reference` to `c` -<4> declare `boolean c`; - load from `a` -> `List reference @0`; - load from `b` -> `List reference @1`; - identity equals `List reference @0` and `List reference @1` - -> `boolean false` - store `boolean false` to `c` -<5> load from `a` -> `List reference @0`; - load from `c` -> `List reference @1`; - identity equals `List reference @0` and `List reference @1` - -> `boolean true` - store `boolean true` to `c` - (note `List reference @0` and `List reference @1` refer to the same - instance) -+ -* Identity equals with `null`. -+ -[source,Painless] ----- -Object a = null; <1> -Object b = null; <2> -boolean c = a === null; <3> -c = a === b; <4> -b = new Object(); <5> -c = a === b; <6> ----- -+ -<1> declare `Object a`; - store `null` to `a` -<2> declare `Object b`; - store `null` to `b` -<3> declare `boolean c`; - load from `a` -> `null @0`; - identity equals `null @0` and `null @1` -> `boolean true`; - store `boolean true` to `c` -<4> load from `a` -> `null @0`; - load from `b` -> `null @1`; - identity equals `null @0` and `null @1` -> `boolean true`; - store `boolean true` to `c` -<5> allocate `Object` instance -> `Object reference`; - store `Object reference` to `b` -<6> load from `a` -> `Object reference`; - load from `b` -> `null`; - identity equals `Object reference` and `null` -> `boolean false`; - store `boolean false` to `c` -+ -* Identity equals with the `def` type. -+ -[source, Painless] ----- -def a = new HashMap(); <1> -def b = new ArrayList(); <2> -boolean c = a === b; <3> -b = a; <4> -c = a === b; <5> ----- -+ -<1> declare `def d`; - allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `def` -> `def` - store `def` to `d` -<2> declare `def e`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `def` -> `def` - store `def` to `d` -<3> declare `boolean c`; - load from `a` -> `def`; - implicit cast `def` to `HashMap reference` -> `HashMap reference`; - load from `b` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - identity equals `HashMap reference` and `ArrayList reference` - -> `boolean false`; - store `boolean false` to `c` -<4> load from `a` -> `def`; - store `def` to `b` -<5> load from `a` -> `def`; - implicit cast `def` to `HashMap reference @0` -> `HashMap reference @0`; - load from `b` -> `def`; - implicit cast `def` to `HashMap reference @1` -> `HashMap reference @1`; - identity equals `HashMap reference @0` and `HashMap reference @1` - -> `boolean true`; - store `boolean true` to `b`; - (note `HashMap reference @0` and `HashMap reference @1` refer to the same - instance) - -[[identity-not-equals-operator]] -==== Identity Not Equals - -Use the `identity not equals operator '!=='` to COMPARE two values where a -resultant `boolean` type value is `true` if the two values are NOT equal and -`false` otherwise. A reference type value is not equal to another reference type -value if both values refer to different instances on the heap or if one value is -`null` and the other is not. A valid comparison is between `boolean` type -values, numeric type values, or reference type values. - -*Errors* - -* If a comparison is made between a `boolean` type value and numeric type value. -* If a comparison is made between a primitive type value and a reference type - value. - -*Grammar* - -[source,ANTLR4] ----- -identity_not_equals: expression '!==' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | boolean | byte | short | char | int | long | float | double | Reference | def -| boolean | boolean | - | - | - | - | - | - | - | - | def -| byte | - | int | int | int | int | long | float | double | - | def -| short | - | int | int | int | int | long | float | double | - | def -| char | - | int | int | int | int | long | float | double | - | def -| int | - | int | int | int | int | long | float | double | - | def -| long | - | long | long | long | long | long | float | double | - | def -| float | - | float | float | float | float | float | float | double | - | def -| double | - | double | double | double | double | double | double | double | - | def -| Reference | - | - | - | - | - | - | - | - | Object | def -| def | def | def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Identity not equals with reference type values. -+ -[source,Painless] ----- -List a = new ArrayList(); <1> -List b = new ArrayList(); <2> -List c = a; <3> -boolean c = a !== b; <4> -c = a !== c; <5> ----- -+ -<1> declare `List a`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `a` -<2> declare `List b`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `b` -<3> load from `a` -> `List reference`; - store `List reference` to `c` -<4> declare `boolean c`; - load from `a` -> `List reference @0`; - load from `b` -> `List reference @1`; - identity not equals `List reference @0` and `List reference @1` - -> `boolean true` - store `boolean true` to `c` -<5> load from `a` -> `List reference @0`; - load from `c` -> `List reference @1`; - identity not equals `List reference @0` and `List reference @1` - -> `boolean false` - store `boolean false` to `c` - (note `List reference @0` and `List reference @1` refer to the same - instance) -+ -* Identity not equals with `null`. -+ -[source,Painless] ----- -Object a = null; <1> -Object b = null; <2> -boolean c = a !== null; <3> -c = a !== b; <4> -b = new Object(); <5> -c = a !== b; <6> ----- -+ -<1> declare `Object a`; - store `null` to `a` -<2> declare `Object b`; - store `null` to `b` -<3> declare `boolean c`; - load from `a` -> `null @0`; - identity not equals `null @0` and `null @1` -> `boolean false`; - store `boolean false` to `c` -<4> load from `a` -> `null @0`; - load from `b` -> `null @1`; - identity not equals `null @0` and `null @1` -> `boolean false`; - store `boolean false` to `c` -<5> allocate `Object` instance -> `Object reference`; - store `Object reference` to `b` -<6> load from `a` -> `Object reference`; - load from `b` -> `null`; - identity not equals `Object reference` and `null` -> `boolean true`; - store `boolean true` to `c` -+ -* Identity not equals with the `def` type. -+ -[source, Painless] ----- -def a = new HashMap(); <1> -def b = new ArrayList(); <2> -boolean c = a !== b; <3> -b = a; <4> -c = a !== b; <5> ----- -+ -<1> declare `def d`; - allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `def` -> `def` - store `def` to `d` -<2> declare `def e`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `def` -> `def` - store `def` to `d` -<3> declare `boolean c`; - load from `a` -> `def`; - implicit cast `def` to `HashMap reference` -> `HashMap reference`; - load from `b` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - identity not equals `HashMap reference` and `ArrayList reference` - -> `boolean true`; - store `boolean true` to `c` -<4> load from `a` -> `def`; - store `def` to `b` -<5> load from `a` -> `def`; - implicit cast `def` to `HashMap reference @0` -> `HashMap reference @0`; - load from `b` -> `def`; - implicit cast `def` to `HashMap reference @1` -> `HashMap reference @1`; - identity not equals `HashMap reference @0` and `HashMap reference @1` - -> `boolean false`; - store `boolean false` to `b`; - (note `HashMap reference @0` and `HashMap reference @1` refer to the same - instance) - -[[boolean-xor-operator]] -==== Boolean Xor - -Use the `boolean xor operator '^'` to XOR together two `boolean` type values -where if one `boolean` type value is `true` and the other is `false` the -resultant `boolean` type value is `true` and `false` otherwise. - -*Errors* - -* If either evaluated value is a value other than a `boolean` type value or - a value that is castable to a `boolean` type value. - -*Truth* - -[cols="^1,^1,^1"] -|==== -| | true | false -| true | false | true -| false | true | false -|==== - -*Grammar* - -[source,ANTLR4] ----- -boolean_xor: expression '^' expression; ----- - -*Examples* - -* Boolean xor with the `boolean` type. -+ -[source,Painless] ----- -boolean x = false; <1> -boolean y = x ^ true; <2> -y = y ^ x; <3> ----- -+ -<1> declare `boolean x`; - store `boolean false` to `x` -<2> declare `boolean y`; - load from `x` -> `boolean false` - boolean xor `boolean false` and `boolean true` -> `boolean true`; - store `boolean true` to `y` -<3> load from `y` -> `boolean true @0`; - load from `x` -> `boolean true @1`; - boolean xor `boolean true @0` and `boolean true @1` -> `boolean false`; - store `boolean false` to `y` -+ -* Boolean xor with the `def` type. -+ -[source,Painless] ----- -def x = false; <1> -def y = x ^ true; <2> -y = y ^ x; <3> ----- -+ -<1> declare `def x`; - implicit cast `boolean false` to `def` -> `def`; - store `def` to `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `boolean false` -> `boolean false`; - boolean xor `boolean false` and `boolean true` -> `boolean true`; - implicit cast `boolean true` to `def` -> `def`; - store `def` to `y` -<3> load from `y` -> `def`; - implicit cast `def` to `boolean true @0` -> `boolean true @0`; - load from `x` -> `def`; - implicit cast `def` to `boolean true @1` -> `boolean true @1`; - boolean xor `boolean true @0` and `boolean true @1` -> `boolean false`; - implicit cast `boolean false` -> `def`; - store `def` to `y` - -[[boolean-and-operator]] -==== Boolean And - -Use the `boolean and operator '&&'` to AND together two `boolean` type values -where if both `boolean` type values are `true` the resultant `boolean` type -value is `true` and `false` otherwise. - -*Errors* - -* If either evaluated value is a value other than a `boolean` type value or - a value that is castable to a `boolean` type value. - -*Truth* - -[cols="^1,^1,^1"] -|==== -| | true | false -| true | true | false -| false | false | false -|==== - -*Grammar* - -[source,ANTLR4] ----- -boolean_and: expression '&&' expression; ----- - -*Examples* - -* Boolean and with the `boolean` type. -+ -[source,Painless] ----- -boolean x = true; <1> -boolean y = x && true; <2> -x = false; <3> -y = y && x; <4> ----- -+ -<1> declare `boolean x`; - store `boolean true` to `x` -<2> declare `boolean y`; - load from `x` -> `boolean true @0`; - boolean and `boolean true @0` and `boolean true @1` -> `boolean true`; - store `boolean true` to `y` -<3> store `boolean false` to `x` -<4> load from `y` -> `boolean true`; - load from `x` -> `boolean false`; - boolean and `boolean true` and `boolean false` -> `boolean false`; - store `boolean false` to `y` -+ -* Boolean and with the `def` type. -+ -[source,Painless] ----- -def x = true; <1> -def y = x && true; <2> -x = false; <3> -y = y && x; <4> ----- -+ -<1> declare `def x`; - implicit cast `boolean true` to `def` -> `def`; - store `def` to `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `boolean true @0` -> `boolean true @0`; - boolean and `boolean true @0` and `boolean true @1` -> `boolean true`; - implicit cast `boolean true` to `def` -> `def`; - store `def` to `y` -<3> implicit cast `boolean false` to `def` -> `def`; - store `def` to `x`; -<4> load from `y` -> `def`; - implicit cast `def` to `boolean true` -> `boolean true`; - load from `x` -> `def`; - implicit cast `def` to `boolean false` -> `boolean false`; - boolean and `boolean true` and `boolean false` -> `boolean false`; - implicit cast `boolean false` -> `def`; - store `def` to `y` - -[[boolean-or-operator]] -==== Boolean Or - -Use the `boolean or operator '||'` to OR together two `boolean` type values -where if either one of the `boolean` type values is `true` the resultant -`boolean` type value is `true` and `false` otherwise. - -*Errors* - -* If either evaluated value is a value other than a `boolean` type value or - a value that is castable to a `boolean` type value. - -*Truth* - -[cols="^1,^1,^1"] -|==== -| | true | false -| true | true | true -| false | true | false -|==== - -*Grammar:* -[source,ANTLR4] ----- -boolean_and: expression '||' expression; ----- - -*Examples* - -* Boolean or with the `boolean` type. -+ -[source,Painless] ----- -boolean x = false; <1> -boolean y = x || true; <2> -y = false; <3> -y = y || x; <4> ----- -+ -<1> declare `boolean x`; - store `boolean false` to `x` -<2> declare `boolean y`; - load from `x` -> `boolean false`; - boolean or `boolean false` and `boolean true` -> `boolean true`; - store `boolean true` to `y` -<3> store `boolean false` to `y` -<4> load from `y` -> `boolean false @0`; - load from `x` -> `boolean false @1`; - boolean or `boolean false @0` and `boolean false @1` -> `boolean false`; - store `boolean false` to `y` -+ -* Boolean or with the `def` type. -+ -[source,Painless] ----- -def x = false; <1> -def y = x || true; <2> -y = false; <3> -y = y || x; <4> ----- -+ -<1> declare `def x`; - implicit cast `boolean false` to `def` -> `def`; - store `def` to `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `boolean false` -> `boolean true`; - boolean or `boolean false` and `boolean true` -> `boolean true`; - implicit cast `boolean true` to `def` -> `def`; - store `def` to `y` -<3> implicit cast `boolean false` to `def` -> `def`; - store `def` to `y`; -<4> load from `y` -> `def`; - implicit cast `def` to `boolean false @0` -> `boolean false @0`; - load from `x` -> `def`; - implicit cast `def` to `boolean false @1` -> `boolean false @1`; - boolean or `boolean false @0` and `boolean false @1` -> `boolean false`; - implicit cast `boolean false` -> `def`; - store `def` to `y` diff --git a/docs/painless/painless-lang-spec/painless-operators-general.asciidoc b/docs/painless/painless-lang-spec/painless-operators-general.asciidoc deleted file mode 100644 index 14e59e44d643f..0000000000000 --- a/docs/painless/painless-lang-spec/painless-operators-general.asciidoc +++ /dev/null @@ -1,432 +0,0 @@ -[[painless-operators-general]] -=== Operators: General - -[[precedence-operator]] -==== Precedence - -Use the `precedence operator '()'` to guarantee the order of evaluation for an -expression. An expression encapsulated by the precedence operator (enclosed in -parentheses) overrides existing precedence relationships between operators and -is evaluated prior to other expressions in inward-to-outward order. - -*Grammar* - -[source,ANTLR4] ----- -precedence: '(' expression ')'; ----- - -*Examples* - -* Precedence with numeric operators. -+ -[source,Painless] ----- -int x = (5+4)*6; <1> -int y = 12/(x-50); <2> ----- -+ -<1> declare `int x`; - add `int 5` and `int 4` -> `int 9`; - multiply `int 9` and `int 6` -> `int 54`; - store `int 54` to `x`; - (note the add is evaluated before the multiply due to the precedence - operator) -<2> declare `int y`; - load from `x` -> `int 54`; - subtract `int 50` from `int 54` -> `int 4`; - divide `int 12` by `int 4` -> `int 3`; - store `int 3` to `y`; - (note the subtract is evaluated before the divide due to the precedence - operator) - -[[function-call-operator]] -==== Function Call - -Use the `function call operator ()` to call an existing function. A -<> is defined within a script. - -*Grammar* - -[source,ANTLR4] ----- -function_call: ID '(' ( expression (',' expression)* )? ')''; ----- - -*Examples* - -* A function call. -+ -[source,Painless] ----- -int add(int x, int y) { <1> - return x + y; - } - -int z = add(1, 2); <2> ----- -+ -<1> define function `add` that returns `int` and has parameters (`int x`, - `int y`) -<2> declare `int z`; - call `add` with arguments (`int 1`, `int 2`) -> `int 3`; - store `int 3` to `z` - -[[cast-operator]] -==== Cast - -An explicit cast converts the value of an original type to the equivalent value -of a target type forcefully as an operation. Use the `cast operator '()'` to -specify an explicit cast. Refer to <> for more -information. - -[[conditional-operator]] -==== Conditional - -A conditional consists of three expressions. The first expression is evaluated -with an expected boolean result type. If the first expression evaluates to true -then the second expression will be evaluated. If the first expression evaluates -to false then the third expression will be evaluated. The second and third -expressions will be <> if the evaluated values are not the -same type. Use the `conditional operator '? :'` as a shortcut to avoid the need -for a full if/else branch in certain expressions. - -*Errors* - -* If the first expression does not evaluate to a boolean type value. -* If the values for the second and third expressions cannot be promoted. - -*Grammar* - -[source,ANTLR4] ----- -conditional: expression '?' expression ':' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | float | double | Reference | def -| byte | int | int | int | int | long | float | double | - | def -| short | int | int | int | int | long | float | double | - | def -| char | int | int | int | int | long | float | double | - | def -| int | int | int | int | int | long | float | double | - | def -| long | long | long | long | long | long | float | double | - | def -| float | float | float | float | float | float | float | double | - | def -| double | double | double | double | double | double | double | double | - | def -| Reference | - | - | - | - | - | - | - | Object @ | def -| def | def | def | def | def | def | def | def | def | def -|==== - -@ If the two reference type values are the same then this promotion will not -occur. - -*Examples* - -* Evaluation of conditionals. -+ -[source,Painless] ----- -boolean b = true; <1> -int x = b ? 1 : 2; <2> -List y = x > 1 ? new ArrayList() : null; <3> -def z = x < 2 ? x : 2.0; <4> ----- -+ -<1> declare `boolean b`; - store `boolean true` to `b` -<2> declare `int x`; - load from `b` -> `boolean true` - evaluate 1st expression: `int 1` -> `int 1`; - store `int 1` to `x` -<3> declare `List y`; - load from `x` -> `int 1`; - `int 1` greater than `int 1` -> `boolean false`; - evaluate 2nd expression: `null` -> `null`; - store `null` to `y`; -<4> declare `def z`; - load from `x` -> `int 1`; - `int 1` less than `int 2` -> `boolean true`; - evaluate 1st expression: load from `x` -> `int 1`; - promote `int 1` and `double 2.0`: result `double`; - implicit cast `int 1` to `double 1.0` -> `double 1.0`; - implicit cast `double 1.0` to `def` -> `def`; - store `def` to `z`; - -[[assignment-operator]] -==== Assignment - -Use the `assignment operator '='` to store a value in a variable or reference -type member field for use in subsequent operations. Any operation that produces -a value can be assigned to any variable/field as long as the -<> are the same or the resultant type can be -<> to the variable/field type. - -See <> for examples using variables. - -*Errors* - -* If the type of value is unable to match the type of variable or field. - -*Grammar* - -[source,ANTLR4] ----- -assignment: field '=' expression ----- - -*Examples* - -The examples use the following reference type definition: - -[source,Painless] ----- -name: - Example - -non-static member fields: - * int x - * def y - * List z ----- - -* Field assignments of different type values. -+ -[source,Painless] ----- -Example example = new Example(); <1> -example.x = 1; <2> -example.y = 2.0; <3> -example.z = new ArrayList(); <4> ----- -+ -<1> declare `Example example`; - allocate `Example` instance -> `Example reference`; - store `Example reference` to `example` -<2> load from `example` -> `Example reference`; - store `int 1` to `x` of `Example reference` -<3> load from `example` -> `Example reference`; - implicit cast `double 2.0` to `def` -> `def`; - store `def` to `y` of `Example reference` -<4> load from `example` -> `Example reference`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `z` of `Example reference` -+ -* A field assignment from a field access. -+ -[source,Painless] ----- -Example example = new Example(); <1> -example.x = 1; <2> -example.y = example.x; <3> ----- -+ -<1> declare `Example example`; - allocate `Example` instance -> `Example reference`; - store `Example reference` to `example` -<2> load from `example` -> `Example reference`; - store `int 1` to `x` of `Example reference` -<3> load from `example` -> `Example reference @0`; - load from `example` -> `Example reference @1`; - load from `x` of `Example reference @1` -> `int 1`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `y` of `Example reference @0`; - (note `Example reference @0` and `Example reference @1` are the same) - -[[compound-assignment-operator]] -==== Compound Assignment - -Use the `compound assignment operator '$='` as a shortcut for an assignment -where a binary operation would occur between the variable/field as the -left-hand side expression and a separate right-hand side expression. - -A compound assignment is equivalent to the expression below where V is the -variable/field and T is the type of variable/member. - -[source,Painless] ----- -V = (T)(V op expression); ----- - -*Operators* - -The table below shows the available operators for use in a compound assignment. -Each operator follows the casting/promotion rules according to their regular -definition. For numeric operations there is an extra implicit cast when -necessary to return the promoted numeric type value to the original numeric type -value of the variable/field and can result in data loss. - -|==== -|Operator|Compound Symbol -|Multiplication|*= -|Division|/= -|Remainder|%= -|Addition|+= -|Subtraction|-= -|Left Shift|+++<<=+++ -|Right Shift|>>= -|Unsigned Right Shift|>>>= -|Bitwise And|&= -|Boolean And|&= -|Bitwise Xor|^= -|Boolean Xor|^= -|Bitwise Or|\|= -|Boolean Or|\|= -|String Concatenation|+= -|==== - -*Errors* - -* If the type of value is unable to match the type of variable or field. - -*Grammar* - -[source,ANTLR4] ----- -compound_assignment: ( ID | field ) '$=' expression; ----- - -Note the use of the `$=` represents the use of any of the possible binary -operators. - -*Examples* - -* Compound assignment for each numeric operator. -+ -[source,Painless] ----- -int i = 10; <1> -i *= 2; <2> -i /= 5; <3> -i %= 3; <4> -i += 5; <5> -i -= 5; <6> -i <<= 2; <7> -i >>= 1; <8> -i >>>= 1; <9> -i &= 15; <10> -i ^= 12; <11> -i |= 2; <12> ----- -+ -<1> declare `int i`; - store `int 10` to `i` -<2> load from `i` -> `int 10`; - multiply `int 10` and `int 2` -> `int 20`; - store `int 20` to `i`; - (note this is equivalent to `i = i*2`) -<3> load from `i` -> `int 20`; - divide `int 20` by `int 5` -> `int 4`; - store `int 4` to `i`; - (note this is equivalent to `i = i/5`) -<4> load from `i` -> `int 4`; - remainder `int 4` by `int 3` -> `int 1`; - store `int 1` to `i`; - (note this is equivalent to `i = i%3`) -<5> load from `i` -> `int 1`; - add `int 1` and `int 5` -> `int 6`; - store `int 6` to `i`; - (note this is equivalent to `i = i+5`) -<6> load from `i` -> `int 6`; - subtract `int 5` from `int 6` -> `int 1`; - store `int 1` to `i`; - (note this is equivalent to `i = i-5`) -<7> load from `i` -> `int 1`; - left shift `int 1` by `int 2` -> `int 4`; - store `int 4` to `i`; - (note this is equivalent to `i = i<<2`) -<8> load from `i` -> `int 4`; - right shift `int 4` by `int 1` -> `int 2`; - store `int 2` to `i`; - (note this is equivalent to `i = i>>1`) -<9> load from `i` -> `int 2`; - unsigned right shift `int 2` by `int 1` -> `int 1`; - store `int 1` to `i`; - (note this is equivalent to `i = i>>>1`) -<10> load from `i` -> `int 1`; - bitwise and `int 1` and `int 15` -> `int 1`; - store `int 1` to `i`; - (note this is equivalent to `i = i&2`) -<11> load from `i` -> `int 1`; - bitwise xor `int 1` and `int 12` -> `int 13`; - store `int 13` to `i`; - (note this is equivalent to `i = i^2`) -<12> load from `i` -> `int 13`; - bitwise or `int 13` and `int 2` -> `int 15`; - store `int 15` to `i`; - (note this is equivalent to `i = i|2`) -+ -* Compound assignment for each boolean operator. -+ -[source,Painless] ----- -boolean b = true; <1> -b &= false; <2> -b ^= false; <3> -b |= true; <4> ----- -+ -<1> declare `boolean b`; - store `boolean true` in `b`; -<2> load from `b` -> `boolean true`; - boolean and `boolean true` and `boolean false` -> `boolean false`; - store `boolean false` to `b`; - (note this is equivalent to `b = b && false`) -<3> load from `b` -> `boolean false`; - boolean xor `boolean false` and `boolean false` -> `boolean false`; - store `boolean false` to `b`; - (note this is equivalent to `b = b ^ false`) -<4> load from `b` -> `boolean true`; - boolean or `boolean false` and `boolean true` -> `boolean true`; - store `boolean true` to `b`; - (note this is equivalent to `b = b || true`) -+ -* A compound assignment with the string concatenation operator. -+ -[source,Painless] ----- -String s = 'compound'; <1> -s += ' assignment'; <2> ----- -<1> declare `String s`; - store `String 'compound'` to `s`; -<2> load from `s` -> `String 'compound'`; - string concat `String 'compound'` and `String ' assignment''` - -> `String 'compound assignment'`; - store `String 'compound assignment'` to `s`; - (note this is equivalent to `s = s + ' assignment'`) -+ -* A compound assignment with the `def` type. -+ -[source,Painless] ----- -def x = 1; <1> -x += 2; <2> ----- -<1> declare `def x`; - implicit cast `int 1` to `def`; - store `def` to `x`; -<2> load from `x` -> `def`; - implicit cast `def` to `int 1` -> `int 1`; - add `int 1` and `int 2` -> `int 3`; - implicit cast `int 3` to `def` -> `def`; - store `def` to `x`; - (note this is equivalent to `x = x+2`) -+ -* A compound assignment with an extra implicit cast. -+ -[source,Painless] ----- -byte b = 1; <1> -b += 2; <2> ----- -<1> declare `byte b`; - store `byte 1` to `x`; -<2> load from `x` -> `byte 1`; - implicit cast `byte 1 to `int 1` -> `int 1`; - add `int 1` and `int 2` -> `int 3`; - implicit cast `int 3` to `byte 3` -> `byte 3`; - store `byte 3` to `b`; - (note this is equivalent to `b = b+2`) diff --git a/docs/painless/painless-lang-spec/painless-operators-numeric.asciidoc b/docs/painless/painless-lang-spec/painless-operators-numeric.asciidoc deleted file mode 100644 index f145dca19bc1f..0000000000000 --- a/docs/painless/painless-lang-spec/painless-operators-numeric.asciidoc +++ /dev/null @@ -1,1339 +0,0 @@ -[[painless-operators-numeric]] -=== Operators: Numeric - -[[post-increment-operator]] -==== Post Increment - -Use the `post increment operator '++'` to INCREASE the value of a numeric type -variable/field by `1`. An extra implicit cast is necessary to return the -promoted numeric type value to the original numeric type value of the -variable/field for the following types: `byte`, `short`, and `char`. If a -variable/field is read as part of an expression the value is loaded prior to the -increment. - -*Errors* - -* If the variable/field is a non-numeric type. - -*Grammar* - -[source,ANTLR4] ----- -post_increment: ( variable | field ) '++'; ----- - -*Promotion* - -[options="header",cols="<1,<1,<1"] -|==== -| original | promoted | implicit -| byte | int | byte -| short | int | short -| char | int | char -| int | int | -| long | long | -| float | float | -| double | double | -| def | def | -|==== - -*Examples* - -* Post increment with different numeric types. -+ -[source,Painless] ----- -short i = 0; <1> -i++; <2> -long j = 1; <3> -long k; <4> -k = j++; <5> ----- -+ -<1> declare `short i`; - store `short 0` to `i` -<2> load from `i` -> `short 0`; - promote `short 0`: result `int`; - add `int 0` and `int 1` -> `int 1`; - implicit cast `int 1` to `short 1`; - store `short 1` to `i` -<3> declare `long j`; - implicit cast `int 1` to `long 1` -> `long 1`; - store `long 1` to `j` -<4> declare `long k`; - store default `long 0` to `k` -<5> load from `j` -> `long 1`; - store `long 1` to `k`; - add `long 1` and `long 1` -> `long 2`; - store `long 2` to `j` -+ -* Post increment with the `def` type. -+ -[source,Painless] ----- -def x = 1; <1> -x++; <2> ----- -+ -<1> declare `def x`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `x` -<2> load from `x` -> `def`; - implicit cast `def` to `int 1`; - add `int 1` and `int 1` -> `int 2`; - implicit cast `int 2` to `def`; - store `def` to `x` - -[[post-decrement-operator]] -==== Post Decrement - -Use the `post decrement operator '--'` to DECREASE the value of a numeric type -variable/field by `1`. An extra implicit cast is necessary to return the -promoted numeric type value to the original numeric type value of the -variable/field for the following types: `byte`, `short`, and `char`. If a -variable/field is read as part of an expression the value is loaded prior to -the decrement. - -*Errors* - -* If the variable/field is a non-numeric type. - -*Grammar* - -[source,ANTLR4] ----- -post_decrement: ( variable | field ) '--'; ----- - -*Promotion* - -[options="header",cols="<1,<1,<1"] -|==== -| original | promoted | implicit -| byte | int | byte -| short | int | short -| char | int | char -| int | int | -| long | long | -| float | float | -| double | double | -| def | def | -|==== - -*Examples* - -* Post decrement with different numeric types. -+ -[source,Painless] ----- -short i = 0; <1> -i--; <2> -long j = 1; <3> -long k; <4> -k = j--; <5> ----- -+ -<1> declare `short i`; - store `short 0` to `i` -<2> load from `i` -> `short 0`; - promote `short 0`: result `int`; - subtract `int 1` from `int 0` -> `int -1`; - implicit cast `int -1` to `short -1`; - store `short -1` to `i` -<3> declare `long j`; - implicit cast `int 1` to `long 1` -> `long 1`; - store `long 1` to `j` -<4> declare `long k`; - store default `long 0` to `k` -<5> load from `j` -> `long 1`; - store `long 1` to `k`; - subtract `long 1` from `long 1` -> `long 0`; - store `long 0` to `j` -+ -* Post decrement with the `def` type. -+ -[source,Painless] ----- -def x = 1; <1> -x--; <2> ----- -+ -<1> declare `def x`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `x` -<2> load from `x` -> `def`; - implicit cast `def` to `int 1`; - subtract `int 1` from `int 1` -> `int 0`; - implicit cast `int 0` to `def`; - store `def` to `x` - -[[pre-increment-operator]] -==== Pre Increment - -Use the `pre increment operator '++'` to INCREASE the value of a numeric type -variable/field by `1`. An extra implicit cast is necessary to return the -promoted numeric type value to the original numeric type value of the -variable/field for the following types: `byte`, `short`, and `char`. If a -variable/field is read as part of an expression the value is loaded after the -increment. - -*Errors* - -* If the variable/field is a non-numeric type. - -*Grammar* - -[source,ANTLR4] ----- -pre_increment: '++' ( variable | field ); ----- - -*Promotion* - -[options="header",cols="<1,<1,<1"] -|==== -| original | promoted | implicit -| byte | int | byte -| short | int | short -| char | int | char -| int | int | -| long | long | -| float | float | -| double | double | -| def | def | -|==== - -*Examples* - -* Pre increment with different numeric types. -+ -[source,Painless] ----- -short i = 0; <1> -++i; <2> -long j = 1; <3> -long k; <4> -k = ++j; <5> ----- -+ -<1> declare `short i`; - store `short 0` to `i` -<2> load from `i` -> `short 0`; - promote `short 0`: result `int`; - add `int 0` and `int 1` -> `int 1`; - implicit cast `int 1` to `short 1`; - store `short 1` to `i` -<3> declare `long j`; - implicit cast `int 1` to `long 1` -> `long 1`; - store `long 1` to `j` -<4> declare `long k`; - store default `long 0` to `k` -<5> load from `j` -> `long 1`; - add `long 1` and `long 1` -> `long 2`; - store `long 2` to `j`; - store `long 2` to `k` -+ -* Pre increment with the `def` type. -+ -[source,Painless] ----- -def x = 1; <1> -++x; <2> ----- -+ -<1> declare `def x`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `x` -<2> load from `x` -> `def`; - implicit cast `def` to `int 1`; - add `int 1` and `int 1` -> `int 2`; - implicit cast `int 2` to `def`; - store `def` to `x` - -[[pre-decrement-operator]] -==== Pre Decrement - -Use the `pre decrement operator '--'` to DECREASE the value of a numeric type -variable/field by `1`. An extra implicit cast is necessary to return the -promoted numeric type value to the original numeric type value of the -variable/field for the following types: `byte`, `short`, and `char`. If a -variable/field is read as part of an expression the value is loaded after the -decrement. - -*Errors* - -* If the variable/field is a non-numeric type. - -*Grammar* - -[source,ANTLR4] ----- -pre_decrement: '--' ( variable | field ); ----- - -*Promotion* - -[options="header",cols="<1,<1,<1"] -|==== -| original | promoted | implicit -| byte | int | byte -| short | int | short -| char | int | char -| int | int | -| long | long | -| float | float | -| double | double | -| def | def | -|==== - -*Examples* - -* Pre decrement with different numeric types. -+ -[source,Painless] ----- -short i = 0; <1> ---i; <2> -long j = 1; <3> -long k; <4> -k = --j; <5> ----- -+ -<1> declare `short i`; - store `short 0` to `i` -<2> load from `i` -> `short 0`; - promote `short 0`: result `int`; - subtract `int 1` from `int 0` -> `int -1`; - implicit cast `int -1` to `short -1`; - store `short -1` to `i` -<3> declare `long j`; - implicit cast `int 1` to `long 1` -> `long 1`; - store `long 1` to `j` -<4> declare `long k`; - store default `long 0` to `k` -<5> load from `j` -> `long 1`; - subtract `long 1` from `long 1` -> `long 0`; - store `long 0` to `j` - store `long 0` to `k`; -+ -* Pre decrement operator with the `def` type. -+ -[source,Painless] ----- -def x = 1; <1> ---x; <2> ----- -+ -<1> declare `def x`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `x` -<2> load from `x` -> `def`; - implicit cast `def` to `int 1`; - subtract `int 1` from `int 1` -> `int 0`; - implicit cast `int 0` to `def`; - store `def` to `x` - -[[unary-positive-operator]] -==== Unary Positive - -Use the `unary positive operator '+'` to the preserve the IDENTITY of a -numeric type value. - -*Errors* - -* If the value is a non-numeric type. - -*Grammar* - -[source,ANTLR4] ----- -unary_positive: '+' expression; ----- - -*Examples* - -* Unary positive with different numeric types. -+ -[source,Painless] ----- -int x = +1; <1> -long y = +x; <2> ----- -+ -<1> declare `int x`; - identity `int 1` -> `int 1`; - store `int 1` to `x` -<2> declare `long y`; - load from `x` -> `int 1`; - identity `int 1` -> `int 1`; - implicit cast `int 1` to `long 1` -> `long 1`; - store `long 1` to `y` -+ -* Unary positive with the `def` type. -+ -[source,Painless] ----- -def z = +1; <1> -int i = +z; <2> ----- -<1> declare `def z`; - identity `int 1` -> `int 1`; - implicit cast `int 1` to `def`; - store `def` to `z` -<2> declare `int i`; - load from `z` -> `def`; - implicit cast `def` to `int 1`; - identity `int 1` -> `int 1`; - store `int 1` to `i`; - -[[unary-negative-operator]] -==== Unary Negative - -Use the `unary negative operator '-'` to NEGATE a numeric type value. - -*Errors* - -* If the value is a non-numeric type. - -*Grammar* - -[source,ANTLR4] ----- -unary_negative: '-' expression; ----- - -*Examples* - -* Unary negative with different numeric types. -+ -[source,Painless] ----- -int x = -1; <1> -long y = -x; <2> ----- -+ -<1> declare `int x`; - negate `int 1` -> `int -1`; - store `int -1` to `x` -<2> declare `long y`; - load from `x` -> `int 1`; - negate `int -1` -> `int 1`; - implicit cast `int 1` to `long 1` -> `long 1`; - store `long 1` to `y` -+ -* Unary negative with the `def` type. -+ -[source,Painless] ----- -def z = -1; <1> -int i = -z; <2> ----- -<1> declare `def z`; - negate `int 1` -> `int -1`; - implicit cast `int -1` to `def`; - store `def` to `z` -<2> declare `int i`; - load from `z` -> `def`; - implicit cast `def` to `int -1`; - negate `int -1` -> `int 1`; - store `int 1` to `i`; - -[[bitwise-not-operator]] -==== Bitwise Not - -Use the `bitwise not operator '~'` to NOT each bit in an integer type value -where a `1-bit` is flipped to a resultant `0-bit` and a `0-bit` is flipped to a -resultant `1-bit`. - -*Errors* - -* If the value is a non-integer type. - -*Bits* - -[options="header",cols="<1,<1"] -|==== -| original | result -| 1 | 0 -| 0 | 1 -|==== - -*Grammar* - -[source,ANTLR4] ----- -bitwise_not: '~' expression; ----- - -*Promotion* - -[options="header",cols="<1,<1"] -|==== -| original | promoted -| byte | int -| short | int -| char | int -| int | int -| long | long -| def | def -|==== - -*Examples* - -* Bitwise not with different numeric types. -+ -[source,Painless] ----- -byte b = 1; <1> -int i = ~b; <2> -long l = ~i; <3> ----- -+ -<1> declare `byte x`; - store `byte 1` to b -<2> declare `int i`; - load from `b` -> `byte 1`; - implicit cast `byte 1` to `int 1` -> `int 1`; - bitwise not `int 1` -> `int -2`; - store `int -2` to `i` -<3> declare `long l`; - load from `i` -> `int -2`; - implicit cast `int -2` to `long -2` -> `long -2`; - bitwise not `long -2` -> `long 1`; - store `long 1` to `l` -+ -* Bitwise not with the `def` type. -+ -[source,Painless] ----- -def d = 1; <1> -def e = ~d; <2> ----- -+ -<1> declare `def d`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `d`; -<2> declare `def e`; - load from `d` -> `def`; - implicit cast `def` to `int 1` -> `int 1`; - bitwise not `int 1` -> `int -2`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `e` - -[[multiplication-operator]] -==== Multiplication - -Use the `multiplication operator '*'` to MULTIPLY together two numeric type -values. Rules for resultant overflow and NaN values follow the JVM -specification. - -*Errors* - -* If either of the values is a non-numeric type. - -*Grammar* - -[source,ANTLR4] ----- -multiplication: expression '*' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | float | double | def -| byte | int | int | int | int | long | float | double | def -| short | int | int | int | int | long | float | double | def -| char | int | int | int | int | long | float | double | def -| int | int | int | int | int | long | float | double | def -| long | long | long | long | long | long | float | double | def -| float | float | float | float | float | float | float | double | def -| double | double | double | double | double | double | double | double | def -| def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Multiplication with different numeric types. -+ -[source,Painless] ----- -int i = 5*4; <1> -double d = i*7.0; <2> ----- -+ -<1> declare `int i`; - multiply `int 4` by `int 5` -> `int 20`; - store `int 20` in `i` -<2> declare `double d`; - load from `int i` -> `int 20`; - promote `int 20` and `double 7.0`: result `double`; - implicit cast `int 20` to `double 20.0` -> `double 20.0`; - multiply `double 20.0` by `double 7.0` -> `double 140.0`; - store `double 140.0` to `d` -+ -* Multiplication with the `def` type. -+ -[source,Painless] ----- -def x = 5*4; <1> -def y = x*2; <2> ----- -<1> declare `def x`; - multiply `int 5` by `int 4` -> `int 20`; - implicit cast `int 20` to `def` -> `def`; - store `def` in `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `int 20`; - multiply `int 20` by `int 2` -> `int 40`; - implicit cast `int 40` to `def` -> `def`; - store `def` to `y` - -[[division-operator]] -==== Division - -Use the `division operator '/'` to DIVIDE one numeric type value by another. -Rules for NaN values and division by zero follow the JVM specification. Division -with integer values drops the remainder of the resultant value. - -*Errors* - -* If either of the values is a non-numeric type. -* If a left-hand side integer type value is divided by a right-hand side integer - type value of `0`. - -*Grammar* - -[source,ANTLR4] ----- -division: expression '/' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | float | double | def -| byte | int | int | int | int | long | float | double | def -| short | int | int | int | int | long | float | double | def -| char | int | int | int | int | long | float | double | def -| int | int | int | int | int | long | float | double | def -| long | long | long | long | long | long | float | double | def -| float | float | float | float | float | float | float | double | def -| double | double | double | double | double | double | double | double | def -| def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Division with different numeric types. -+ -[source,Painless] ----- -int i = 29/4; <1> -double d = i/7.0; <2> ----- -+ -<1> declare `int i`; - divide `int 29` by `int 4` -> `int 7`; - store `int 7` in `i` -<2> declare `double d`; - load from `int i` -> `int 7`; - promote `int 7` and `double 7.0`: result `double`; - implicit cast `int 7` to `double 7.0` -> `double 7.0`; - divide `double 7.0` by `double 7.0` -> `double 1.0`; - store `double 1.0` to `d` -+ -* Division with the `def` type. -+ -[source,Painless] ----- -def x = 5/4; <1> -def y = x/2; <2> ----- -<1> declare `def x`; - divide `int 5` by `int 4` -> `int 1`; - implicit cast `int 1` to `def` -> `def`; - store `def` in `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `int 1`; - divide `int 1` by `int 2` -> `int 0`; - implicit cast `int 0` to `def` -> `def`; - store `def` to `y` - -[[remainder-operator]] -==== Remainder - -Use the `remainder operator '%'` to calculate the REMAINDER for division -between two numeric type values. Rules for NaN values and division by zero follow the JVM -specification. - -*Errors* - -* If either of the values is a non-numeric type. - -*Grammar* - -[source,ANTLR4] ----- -remainder: expression '%' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | float | double | def -| byte | int | int | int | int | long | float | double | def -| short | int | int | int | int | long | float | double | def -| char | int | int | int | int | long | float | double | def -| int | int | int | int | int | long | float | double | def -| long | long | long | long | long | long | float | double | def -| float | float | float | float | float | float | float | double | def -| double | double | double | double | double | double | double | double | def -| def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Remainder with different numeric types. -+ -[source,Painless] ----- -int i = 29%4; <1> -double d = i%7.0; <2> ----- -+ -<1> declare `int i`; - remainder `int 29` by `int 4` -> `int 1`; - store `int 7` in `i` -<2> declare `double d`; - load from `int i` -> `int 1`; - promote `int 1` and `double 7.0`: result `double`; - implicit cast `int 1` to `double 1.0` -> `double 1.0`; - remainder `double 1.0` by `double 7.0` -> `double 1.0`; - store `double 1.0` to `d` -+ -* Remainder with the `def` type. -+ -[source,Painless] ----- -def x = 5%4; <1> -def y = x%2; <2> ----- -<1> declare `def x`; - remainder `int 5` by `int 4` -> `int 1`; - implicit cast `int 1` to `def` -> `def`; - store `def` in `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `int 1`; - remainder `int 1` by `int 2` -> `int 1`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `y` - -[[addition-operator]] -==== Addition - -Use the `addition operator '+'` to ADD together two numeric type values. Rules -for resultant overflow and NaN values follow the JVM specification. - -*Errors* - -* If either of the values is a non-numeric type. - -*Grammar* - -[source,ANTLR4] ----- -addition: expression '+' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | float | double | def -| byte | int | int | int | int | long | float | double | def -| short | int | int | int | int | long | float | double | def -| char | int | int | int | int | long | float | double | def -| int | int | int | int | int | long | float | double | def -| long | long | long | long | long | long | float | double | def -| float | float | float | float | float | float | float | double | def -| double | double | double | double | double | double | double | double | def -| def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Addition operator with different numeric types. -+ -[source,Painless] ----- -int i = 29+4; <1> -double d = i+7.0; <2> ----- -+ -<1> declare `int i`; - add `int 29` and `int 4` -> `int 33`; - store `int 33` in `i` -<2> declare `double d`; - load from `int i` -> `int 33`; - promote `int 33` and `double 7.0`: result `double`; - implicit cast `int 33` to `double 33.0` -> `double 33.0`; - add `double 33.0` and `double 7.0` -> `double 40.0`; - store `double 40.0` to `d` -+ -* Addition with the `def` type. -+ -[source,Painless] ----- -def x = 5+4; <1> -def y = x+2; <2> ----- -<1> declare `def x`; - add `int 5` and `int 4` -> `int 9`; - implicit cast `int 9` to `def` -> `def`; - store `def` in `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `int 9`; - add `int 9` and `int 2` -> `int 11`; - implicit cast `int 11` to `def` -> `def`; - store `def` to `y` - -[[subtraction-operator]] -==== Subtraction - -Use the `subtraction operator '-'` to SUBTRACT a right-hand side numeric type -value from a left-hand side numeric type value. Rules for resultant overflow -and NaN values follow the JVM specification. - -*Errors* - -* If either of the values is a non-numeric type. - -*Grammar* - -[source,ANTLR4] ----- -subtraction: expression '-' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | float | double | def -| byte | int | int | int | int | long | float | double | def -| short | int | int | int | int | long | float | double | def -| char | int | int | int | int | long | float | double | def -| int | int | int | int | int | long | float | double | def -| long | long | long | long | long | long | float | double | def -| float | float | float | float | float | float | float | double | def -| double | double | double | double | double | double | double | double | def -| def | def | def | def | def | def | def | def | def -|==== - -*Examples* - -* Subtraction with different numeric types. -+ -[source,Painless] ----- -int i = 29-4; <1> -double d = i-7.5; <2> ----- -+ -<1> declare `int i`; - subtract `int 4` from `int 29` -> `int 25`; - store `int 25` in `i` -<2> declare `double d` - load from `int i` -> `int 25`; - promote `int 25` and `double 7.5`: result `double`; - implicit cast `int 25` to `double 25.0` -> `double 25.0`; - subtract `double 33.0` by `double 7.5` -> `double 25.5`; - store `double 25.5` to `d` -+ -* Subtraction with the `def` type. -+ -[source,Painless] ----- -def x = 5-4; <1> -def y = x-2; <2> ----- -<1> declare `def x`; - subtract `int 4` and `int 5` -> `int 1`; - implicit cast `int 1` to `def` -> `def`; - store `def` in `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `int 1`; - subtract `int 2` from `int 1` -> `int -1`; - implicit cast `int -1` to `def` -> `def`; - store `def` to `y` - -[[left-shift-operator]] -==== Left Shift - -Use the `left shift operator '<<'` to SHIFT lower order bits to higher order -bits in a left-hand side integer type value by the distance specified in a -right-hand side integer type value. - -*Errors* - -* If either of the values is a non-integer type. -* If the right-hand side value cannot be cast to an int type. - -*Grammar* - -[source,ANTLR4] ----- -left_shift: expression '<<' expression; ----- - -*Promotion* - -The left-hand side integer type value is promoted as specified in the table -below. The right-hand side integer type value is always implicitly cast to an -`int` type value and truncated to the number of bits of the promoted type value. - -[options="header",cols="<1,<1"] -|==== -| original | promoted -| byte | int -| short | int -| char | int -| int | int -| long | long -| def | def -|==== - -*Examples* - -* Left shift with different integer types. -+ -[source,Painless] ----- -int i = 4 << 1; <1> -long l = i << 2L; <2> ----- -+ -<1> declare `int i`; - left shift `int 4` by `int 1` -> `int 8`; - store `int 8` in `i` -<2> declare `long l` - load from `int i` -> `int 8`; - implicit cast `long 2` to `int 2` -> `int 2`; - left shift `int 8` by `int 2` -> `int 32`; - implicit cast `int 32` to `long 32` -> `long 32`; - store `long 32` to `l` -+ -* Left shift with the `def` type. -+ -[source,Painless] ----- -def x = 4 << 2; <1> -def y = x << 1; <2> ----- -<1> declare `def x`; - left shift `int 4` by `int 2` -> `int 16`; - implicit cast `int 16` to `def` -> `def`; - store `def` in `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `int 16`; - left shift `int 16` by `int 1` -> `int 32`; - implicit cast `int 32` to `def` -> `def`; - store `def` to `y` - -[[right-shift-operator]] -==== Right Shift - -Use the `right shift operator '>>'` to SHIFT higher order bits to lower order -bits in a left-hand side integer type value by the distance specified in a -right-hand side integer type value. The highest order bit of the left-hand side -integer type value is preserved. - -*Errors* - -* If either of the values is a non-integer type. -* If the right-hand side value cannot be cast to an int type. - -*Grammar* - -[source,ANTLR4] ----- -right_shift: expression '>>' expression; ----- - -*Promotion* - -The left-hand side integer type value is promoted as specified in the table -below. The right-hand side integer type value is always implicitly cast to an -`int` type value and truncated to the number of bits of the promoted type value. - -[options="header",cols="<1,<1"] -|==== -| original | promoted -| byte | int -| short | int -| char | int -| int | int -| long | long -| def | def -|==== - -*Examples* - -* Right shift with different integer types. -+ -[source,Painless] ----- -int i = 32 >> 1; <1> -long l = i >> 2L; <2> ----- -+ -<1> declare `int i`; - right shift `int 32` by `int 1` -> `int 16`; - store `int 16` in `i` -<2> declare `long l` - load from `int i` -> `int 16`; - implicit cast `long 2` to `int 2` -> `int 2`; - right shift `int 16` by `int 2` -> `int 4`; - implicit cast `int 4` to `long 4` -> `long 4`; - store `long 4` to `l` -+ -* Right shift with the `def` type. -+ -[source,Painless] ----- -def x = 16 >> 2; <1> -def y = x >> 1; <2> ----- -<1> declare `def x`; - right shift `int 16` by `int 2` -> `int 4`; - implicit cast `int 4` to `def` -> `def`; - store `def` in `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `int 4`; - right shift `int 4` by `int 1` -> `int 2`; - implicit cast `int 2` to `def` -> `def`; - store `def` to `y` - -[[unsigned-right-shift-operator]] -==== Unsigned Right Shift - -Use the `unsigned right shift operator '>>>'` to SHIFT higher order bits to -lower order bits in a left-hand side integer type value by the distance -specified in a right-hand side type integer value. The highest order bit of the -left-hand side integer type value is *not* preserved. - -*Errors* - -* If either of the values is a non-integer type. -* If the right-hand side value cannot be cast to an int type. - -*Grammar* - -[source,ANTLR4] ----- -unsigned_right_shift: expression '>>>' expression; ----- - -*Promotion* - -The left-hand side integer type value is promoted as specified in the table -below. The right-hand side integer type value is always implicitly cast to an -`int` type value and truncated to the number of bits of the promoted type value. - -[options="header",cols="<1,<1"] -|==== -| original | promoted -| byte | int -| short | int -| char | int -| int | int -| long | long -| def | def -|==== - -*Examples* - -* Unsigned right shift with different integer types. -+ -[source,Painless] ----- -int i = -1 >>> 29; <1> -long l = i >>> 2L; <2> ----- -+ -<1> declare `int i`; - unsigned right shift `int -1` by `int 29` -> `int 7`; - store `int 7` in `i` -<2> declare `long l` - load from `int i` -> `int 7`; - implicit cast `long 2` to `int 2` -> `int 2`; - unsigned right shift `int 7` by `int 2` -> `int 3`; - implicit cast `int 3` to `long 3` -> `long 3`; - store `long 3` to `l` -+ -* Unsigned right shift with the `def` type. -+ -[source,Painless] ----- -def x = 16 >>> 2; <1> -def y = x >>> 1; <2> ----- -<1> declare `def x`; - unsigned right shift `int 16` by `int 2` -> `int 4`; - implicit cast `int 4` to `def` -> `def`; - store `def` in `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `int 4`; - unsigned right shift `int 4` by `int 1` -> `int 2`; - implicit cast `int 2` to `def` -> `def`; - store `def` to `y` - -[[bitwise-and-operator]] -==== Bitwise And - -Use the `bitwise and operator '&'` to AND together each bit within two -integer type values where if both bits at the same index are `1` the resultant -bit is `1` and `0` otherwise. - -*Errors* - -* If either of the values is a non-integer type. - -*Bits* - -[cols="^1,^1,^1"] -|==== -| | 1 | 0 -| 1 | 1 | 0 -| 0 | 0 | 0 -|==== - -*Grammar* - -[source,ANTLR4] ----- -bitwise_and: expression '&' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | def -| byte | int | int | int | int | long | def -| short | int | int | int | int | long | def -| char | int | int | int | int | long | def -| int | int | int | int | int | long | def -| long | long | long | long | long | long | def -| def | def | def | def | def | def | def -|==== - -*Examples* - -* Bitwise and with different integer types. -+ -[source,Painless] ----- -int i = 5 & 6; <1> -long l = i & 5L; <2> ----- -+ -<1> declare `int i`; - bitwise and `int 5` and `int 6` -> `int 4`; - store `int 4` in `i` -<2> declare `long l` - load from `int i` -> `int 4`; - promote `int 4` and `long 5`: result `long`; - implicit cast `int 4` to `long 4` -> `long 4`; - bitwise and `long 4` and `long 5` -> `long 4`; - store `long 4` to `l` -+ -* Bitwise and with the `def` type. -+ -[source,Painless] ----- -def x = 15 & 6; <1> -def y = x & 5; <2> ----- -<1> declare `def x`; - bitwise and `int 15` and `int 6` -> `int 6`; - implicit cast `int 6` to `def` -> `def`; - store `def` in `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `int 6`; - bitwise and `int 6` and `int 5` -> `int 4`; - implicit cast `int 4` to `def` -> `def`; - store `def` to `y` - -[[bitwise-xor-operator]] -==== Bitwise Xor - -Use the `bitwise xor operator '^'` to XOR together each bit within two integer -type values where if one bit is a `1` and the other bit is a `0` at the same -index the resultant bit is `1` otherwise the resultant bit is `0`. - -*Errors* - -* If either of the values is a non-integer type. - -*Bits* - -The following table illustrates the resultant bit from the xoring of two bits. - -[cols="^1,^1,^1"] -|==== -| | 1 | 0 -| 1 | 0 | 1 -| 0 | 1 | 0 -|==== - -*Grammar* - -[source,ANTLR4] ----- -bitwise_xor: expression '^' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | def -| byte | int | int | int | int | long | def -| short | int | int | int | int | long | def -| char | int | int | int | int | long | def -| int | int | int | int | int | long | def -| long | long | long | long | long | long | def -| def | def | def | def | def | def | def -|==== - -*Examples* - -* Bitwise xor with different integer types. -+ -[source,Painless] ----- -int i = 5 ^ 6; <1> -long l = i ^ 5L; <2> ----- -+ -<1> declare `int i`; - bitwise xor `int 5` and `int 6` -> `int 3`; - store `int 3` in `i` -<2> declare `long l` - load from `int i` -> `int 4`; - promote `int 3` and `long 5`: result `long`; - implicit cast `int 3` to `long 3` -> `long 3`; - bitwise xor `long 3` and `long 5` -> `long 6`; - store `long 6` to `l` -+ -* Bitwise xor with the `def` type. -+ -[source,Painless] ----- -def x = 15 ^ 6; <1> -def y = x ^ 5; <2> ----- -<1> declare `def x`; - bitwise xor `int 15` and `int 6` -> `int 9`; - implicit cast `int 9` to `def` -> `def`; - store `def` in `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `int 9`; - bitwise xor `int 9` and `int 5` -> `int 12`; - implicit cast `int 12` to `def` -> `def`; - store `def` to `y` - -[[bitwise-or-operator]] -==== Bitwise Or - -Use the `bitwise or operator '|'` to OR together each bit within two integer -type values where if at least one bit is a `1` at the same index the resultant -bit is `1` otherwise the resultant bit is `0`. - -*Errors* - -* If either of the values is a non-integer type. - -*Bits* - -The following table illustrates the resultant bit from the oring of two bits. - -[cols="^1,^1,^1"] -|==== -| | 1 | 0 -| 1 | 1 | 1 -| 0 | 1 | 0 -|==== - -*Grammar* - -[source,ANTLR4] ----- -bitwise_or: expression '|' expression; ----- - -*Promotion* - -[cols="<1,^1,^1,^1,^1,^1,^1"] -|==== -| | byte | short | char | int | long | def -| byte | int | int | int | int | long | def -| short | int | int | int | int | long | def -| char | int | int | int | int | long | def -| int | int | int | int | int | long | def -| long | long | long | long | long | long | def -| def | def | def | def | def | def | def -|==== - -*Examples* - -* Bitwise or with different integer types. -+ -[source,Painless] ----- -int i = 5 | 6; <1> -long l = i | 8L; <2> ----- -+ -<1> declare `int i`; - bitwise or `int 5` and `int 6` -> `int 7`; - store `int 7` in `i` -<2> declare `long l` - load from `int i` -> `int 7`; - promote `int 7` and `long 8`: result `long`; - implicit cast `int 7` to `long 7` -> `long 7`; - bitwise or `long 7` and `long 8` -> `long 15`; - store `long 15` to `l` -+ -* Bitwise or with the `def` type. -+ -[source,Painless] ----- -def x = 5 ^ 6; <1> -def y = x ^ 8; <2> ----- -<1> declare `def x`; - bitwise or `int 5` and `int 6` -> `int 7`; - implicit cast `int 7` to `def` -> `def`; - store `def` in `x` -<2> declare `def y`; - load from `x` -> `def`; - implicit cast `def` to `int 7`; - bitwise or `int 7` and `int 8` -> `int 15`; - implicit cast `int 15` to `def` -> `def`; - store `def` to `y` diff --git a/docs/painless/painless-lang-spec/painless-operators-reference.asciidoc b/docs/painless/painless-lang-spec/painless-operators-reference.asciidoc deleted file mode 100644 index a2ec8266aa323..0000000000000 --- a/docs/painless/painless-lang-spec/painless-operators-reference.asciidoc +++ /dev/null @@ -1,774 +0,0 @@ -[[painless-operators-reference]] -=== Operators: Reference - -[[method-call-operator]] -==== Method Call - -Use the `method call operator '()'` to call a member method on a -<> value. Implicit -<> is evaluated as necessary per argument -during the method call. When a method call is made on a target `def` type value, -the parameters and return type value are considered to also be of the `def` type -and are evaluated at run-time. - -An overloaded method is one that shares the same name with two or more methods. -A method is overloaded based on arity where the same name is re-used for -multiple methods as long as the number of parameters differs. - -*Errors* - -* If the reference type value is `null`. -* If the member method name doesn't exist for a given reference type value. -* If the number of arguments passed in is different from the number of specified - parameters. -* If the arguments cannot be implicitly cast or implicitly boxed/unboxed to the - correct type values for the parameters. - -*Grammar* - -[source,ANTLR4] ----- -method_call: '.' ID arguments; -arguments: '(' (expression (',' expression)*)? ')'; ----- - -*Examples* - -* Method calls on different reference types. -+ -[source,Painless] ----- -Map m = new HashMap(); <1> -m.put(1, 2); <2> -int z = m.get(1); <3> -def d = new ArrayList(); <4> -d.add(1); <5> -int i = Integer.parseInt(d.get(0).toString()); <6> ----- -+ -<1> declare `Map m`; - allocate `HashMap` instance -> `HashMap reference`; - store `HashMap reference` to `m` -<2> load from `m` -> `Map reference`; - implicit cast `int 1` to `def` -> `def`; - implicit cast `int 2` to `def` -> `def`; - call `put` on `Map reference` with arguments (`int 1`, `int 2`) -<3> declare `int z`; - load from `m` -> `Map reference`; - call `get` on `Map reference` with arguments (`int 1`) -> `def`; - implicit cast `def` to `int 2` -> `int 2`; - store `int 2` to `z` -<4> declare `def d`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList` to `def` -> `def`; - store `def` to `d` -<5> load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference` - call `add` on `ArrayList reference` with arguments (`int 1`); -<6> declare `int i`; - load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference` - call `get` on `ArrayList reference` with arguments (`int 1`) -> `def`; - implicit cast `def` to `Integer 1 reference` -> `Integer 1 reference`; - call `toString` on `Integer 1 reference` -> `String '1'`; - call `parseInt` on `Integer` with arguments (`String '1'`) -> `int 1`; - store `int 1` in `i`; - -[[field-access-operator]] -==== Field Access - -Use the `field access operator '.'` to store a value to or load a value from a -<> member field. - -*Errors* - -* If the reference type value is `null`. -* If the member field name doesn't exist for a given reference type value. - -*Grammar* - -[source,ANTLR4] ----- -field_access: '.' ID; ----- - -*Examples* - -The examples use the following reference type definition: - -[source,Painless] ----- -name: - Example - -non-static member fields: - * int x - * def y - * List z ----- - -* Field access with the `Example` type. -+ -[source,Painless] ----- -Example example = new Example(); <1> -example.x = 1; <2> -example.y = example.x; <3> -example.z = new ArrayList(); <4> -example.z.add(1); <5> -example.x = example.z.get(0); <6> ----- -+ -<1> declare `Example example`; - allocate `Example` instance -> `Example reference`; - store `Example reference` to `example` -<2> load from `example` -> `Example reference`; - store `int 1` to `x` of `Example reference` -<3> load from `example` -> `Example reference @0`; - load from `example` -> `Example reference @1`; - load from `x` of `Example reference @1` -> `int 1`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `y` of `Example reference @0`; - (note `Example reference @0` and `Example reference @1` are the same) -<4> load from `example` -> `Example reference`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `z` of `Example reference` -<5> load from `example` -> `Example reference`; - load from `z` of `Example reference` -> `List reference`; - call `add` on `List reference` with arguments (`int 1`) -<6> load from `example` -> `Example reference @0`; - load from `example` -> `Example reference @1`; - load from `z` of `Example reference @1` -> `List reference`; - call `get` on `List reference` with arguments (`int 0`) -> `int 1`; - store `int 1` in `x` of `List reference @0`; - (note `Example reference @0` and `Example reference @1` are the same) - -[[null-safe-operator]] -==== Null Safe - -Use the `null safe operator '?.'` instead of the method call operator or field -access operator to ensure a reference type value is `non-null` before -a method call or field access. A `null` value will be returned if the reference -type value is `null`, otherwise the method call or field access is evaluated. - -*Errors* - -* If the method call return type value or the field access type value is not - a reference type value and is not implicitly castable to a reference type - value. - -*Grammar* - -[source,ANTLR4] ----- -null_safe: null_safe_method_call - | null_safe_field_access - ; - -null_safe_method_call: '?.' ID arguments; -arguments: '(' (expression (',' expression)*)? ')'; - -null_safe_field_access: '?.' ID; ----- - -*Examples* - -The examples use the following reference type definition: - -[source,Painless] ----- -name: - Example - -non-static member methods: - * List factory() - -non-static member fields: - * List x ----- - -* Null safe without a `null` value. -+ -[source,Painless] ----- -Example example = new Example(); <1> -List x = example?.factory(); <2> ----- -+ -<1> declare `Example example`; - allocate `Example` instance -> `Example reference`; - store `Example reference` to `example` -<2> declare `List x`; - load from `example` -> `Example reference`; - null safe call `factory` on `Example reference` -> `List reference`; - store `List reference` to `x`; -+ -* Null safe with a `null` value; -+ -[source,Painless] ----- -Example example = null; <1> -List x = example?.x; <2> ----- -<1> declare `Example example`; - store `null` to `example` -<2> declare `List x`; - load from `example` -> `Example reference`; - null safe access `x` on `Example reference` -> `null`; - store `null` to `x`; - (note the *null safe operator* returned `null` because `example` is `null`) - -[[list-initialization-operator]] -==== List Initialization - -Use the `list initialization operator '[]'` to allocate an `List` type instance -to the heap with a set of pre-defined values. Each value used to initialize the -`List` type instance is cast to a `def` type value upon insertion into the -`List` type instance using the `add` method. The order of the specified values -is maintained. - -*Grammar* - -[source,ANTLR4] ----- -list_initialization: '[' expression (',' expression)* ']' - | '[' ']'; ----- - -*Examples* - -* List initialization of an empty `List` type value. -+ -[source,Painless] ----- -List empty = []; <1> ----- -+ -<1> declare `List empty`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `empty` -+ -* List initialization with static values. -+ -[source,Painless] ----- -List list = [1, 2, 3]; <1> ----- -+ -<1> declare `List list`; - allocate `ArrayList` instance -> `ArrayList reference`; - call `add` on `ArrayList reference` with arguments(`int 1`); - call `add` on `ArrayList reference` with arguments(`int 2`); - call `add` on `ArrayList reference` with arguments(`int 3`); - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `list` -+ -* List initialization with non-static values. -+ -[source,Painless] ----- -int i = 1; <1> -long l = 2L; <2> -float f = 3.0F; <3> -double d = 4.0; <4> -String s = "5"; <5> -List list = [i, l, f*d, s]; <6> ----- -+ -<1> declare `int i`; - store `int 1` to `i` -<2> declare `long l`; - store `long 2` to `l` -<3> declare `float f`; - store `float 3.0` to `f` -<4> declare `double d`; - store `double 4.0` to `d` -<5> declare `String s`; - store `String "5"` to `s` -<6> declare `List list`; - allocate `ArrayList` instance -> `ArrayList reference`; - load from `i` -> `int 1`; - call `add` on `ArrayList reference` with arguments(`int 1`); - load from `l` -> `long 2`; - call `add` on `ArrayList reference` with arguments(`long 2`); - load from `f` -> `float 3.0`; - load from `d` -> `double 4.0`; - promote `float 3.0` and `double 4.0`: result `double`; - implicit cast `float 3.0` to `double 3.0` -> `double 3.0`; - multiply `double 3.0` and `double 4.0` -> `double 12.0`; - call `add` on `ArrayList reference` with arguments(`double 12.0`); - load from `s` -> `String "5"`; - call `add` on `ArrayList reference` with arguments(`String "5"`); - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `list` - -[[list-access-operator]] -==== List Access - -Use the `list access operator '[]'` as a shortcut for a `set` method call or -`get` method call made on a `List` type value. - -*Errors* - -* If a value other than a `List` type value is accessed. -* If a non-integer type value is used as an index for a `set` method call or - `get` method call. - -*Grammar* - -[source,ANTLR4] ----- -list_access: '[' expression ']' ----- - -*Examples* - -* List access with the `List` type. -+ -[source,Painless] ----- -List list = new ArrayList(); <1> -list.add(1); <2> -list.add(2); <3> -list.add(3); <4> -list[0] = 2; <5> -list[1] = 5; <6> -int x = list[0] + list[1]; <7> -int y = 1; <8> -int z = list[y]; <9> ----- -+ -<1> declare `List list`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `list` -<2> load from `list` -> `List reference`; - call `add` on `List reference` with arguments(`int 1`) -<3> load from `list` -> `List reference`; - call `add` on `List reference` with arguments(`int 2`) -<4> load from `list` -> `List reference`; - call `add` on `List reference` with arguments(`int 3`) -<5> load from `list` -> `List reference`; - call `set` on `List reference` with arguments(`int 0`, `int 2`) -<6> load from `list` -> `List reference`; - call `set` on `List reference` with arguments(`int 1`, `int 5`) -<7> declare `int x`; - load from `list` -> `List reference`; - call `get` on `List reference` with arguments(`int 0`) -> `def`; - implicit cast `def` to `int 2` -> `int 2`; - load from `list` -> `List reference`; - call `get` on `List reference` with arguments(`int 1`) -> `def`; - implicit cast `def` to `int 5` -> `int 5`; - add `int 2` and `int 5` -> `int 7`; - store `int 7` to `x` -<8> declare `int y`; - store `int 1` int `y` -<9> declare `int z`; - load from `list` -> `List reference`; - load from `y` -> `int 1`; - call `get` on `List reference` with arguments(`int 1`) -> `def`; - implicit cast `def` to `int 5` -> `int 5`; - store `int 5` to `z` -+ -* List access with the `def` type. -+ -[source,Painless] ----- -def d = new ArrayList(); <1> -d.add(1); <2> -d.add(2); <3> -d.add(3); <4> -d[0] = 2; <5> -d[1] = 5; <6> -def x = d[0] + d[1]; <7> -def y = 1; <8> -def z = d[y]; <9> ----- -+ -<1> declare `List d`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `def` -> `def`; - store `def` to `d` -<2> load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - call `add` on `ArrayList reference` with arguments(`int 1`) -<3> load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - call `add` on `ArrayList reference` with arguments(`int 2`) -<4> load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - call `add` on `ArrayList reference` with arguments(`int 3`) -<5> load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - call `set` on `ArrayList reference` with arguments(`int 0`, `int 2`) -<6> load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - call `set` on `ArrayList reference` with arguments(`int 1`, `int 5`) -<7> declare `def x`; - load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - call `get` on `ArrayList reference` with arguments(`int 0`) -> `def`; - implicit cast `def` to `int 2` -> `int 2`; - load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - call `get` on `ArrayList reference` with arguments(`int 1`) -> `def`; - implicit cast `def` to `int 2` -> `int 2`; - add `int 2` and `int 5` -> `int 7`; - store `int 7` to `x` -<8> declare `int y`; - store `int 1` int `y` -<9> declare `int z`; - load from `d` -> `ArrayList reference`; - load from `y` -> `def`; - implicit cast `def` to `int 1` -> `int 1`; - call `get` on `ArrayList reference` with arguments(`int 1`) -> `def`; - store `def` to `z` - -[[map-initialization-operator]] -==== Map Initialization - -Use the `map initialization operator '[:]'` to allocate a `Map` type instance to -the heap with a set of pre-defined values. Each pair of values used to -initialize the `Map` type instance are cast to `def` type values upon insertion -into the `Map` type instance using the `put` method. - -*Grammar* - -[source,ANTLR4] ----- -map_initialization: '[' key_pair (',' key_pair)* ']' - | '[' ':' ']'; -key_pair: expression ':' expression ----- - -*Examples* - -* Map initialization of an empty `Map` type value. -+ -[source,Painless] ----- -Map empty = [:]; <1> ----- -+ -<1> declare `Map empty`; - allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `Map reference` -> `Map reference`; - store `Map reference` to `empty` -+ -* Map initialization with static values. -+ -[source,Painless] ----- -Map map = [1:2, 3:4, 5:6]; <1> ----- -+ -<1> declare `Map map`; - allocate `HashMap` instance -> `HashMap reference`; - call `put` on `HashMap reference` with arguments(`int 1`, `int 2`); - call `put` on `HashMap reference` with arguments(`int 3`, `int 4`); - call `put` on `HashMap reference` with arguments(`int 5`, `int 6`); - implicit cast `HashMap reference` to `Map reference` -> `Map reference`; - store `Map reference` to `map` -+ -* Map initialization with non-static values. -+ -[source,Painless] ----- -byte b = 0; <1> -int i = 1; <2> -long l = 2L; <3> -float f = 3.0F; <4> -double d = 4.0; <5> -String s = "5"; <6> -Map map = [b:i, l:f*d, d:s]; <7> ----- -+ -<1> declare `byte b`; - store `byte 0` to `b` -<2> declare `int i`; - store `int 1` to `i` -<3> declare `long l`; - store `long 2` to `l` -<4> declare `float f`; - store `float 3.0` to `f` -<5> declare `double d`; - store `double 4.0` to `d` -<6> declare `String s`; - store `String "5"` to `s` -<7> declare `Map map`; - allocate `HashMap` instance -> `HashMap reference`; - load from `b` -> `byte 0`; - load from `i` -> `int 1`; - call `put` on `HashMap reference` with arguments(`byte 0`, `int 1`); - load from `l` -> `long 2`; - load from `f` -> `float 3.0`; - load from `d` -> `double 4.0`; - promote `float 3.0` and `double 4.0`: result `double`; - implicit cast `float 3.0` to `double 3.0` -> `double 3.0`; - multiply `double 3.0` and `double 4.0` -> `double 12.0`; - call `put` on `HashMap reference` with arguments(`long 2`, `double 12.0`); - load from `d` -> `double 4.0`; - load from `s` -> `String "5"`; - call `put` on `HashMap reference` with - arguments(`double 4.0`, `String "5"`); - implicit cast `HashMap reference` to `Map reference` -> `Map reference`; - store `Map reference` to `map` - -[[map-access-operator]] -==== Map Access - -Use the `map access operator '[]'` as a shortcut for a `put` method call or -`get` method call made on a `Map` type value. - -*Errors* - -* If a value other than a `Map` type value is accessed. - -*Grammar* -[source,ANTLR4] ----- -map_access: '[' expression ']' ----- - -*Examples* - -* Map access with the `Map` type. -+ -[source,Painless] ----- -Map map = new HashMap(); <1> -map['value2'] = 2; <2> -map['value5'] = 5; <3> -int x = map['value2'] + map['value5']; <4> -String y = 'value5'; <5> -int z = x[z]; <6> ----- -+ -<1> declare `Map map`; - allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `Map reference` -> `Map reference`; - store `Map reference` to `map` -<2> load from `map` -> `Map reference`; - call `put` on `Map reference` with arguments(`String 'value2'`, `int 2`) -<3> load from `map` -> `Map reference`; - call `put` on `Map reference` with arguments(`String 'value5'`, `int 5`) -<4> declare `int x`; - load from `map` -> `Map reference`; - call `get` on `Map reference` with arguments(`String 'value2'`) -> `def`; - implicit cast `def` to `int 2` -> `int 2`; - load from `map` -> `Map reference`; - call `get` on `Map reference` with arguments(`String 'value5'`) -> `def`; - implicit cast `def` to `int 5` -> `int 5`; - add `int 2` and `int 5` -> `int 7`; - store `int 7` to `x` -<5> declare `String y`; - store `String 'value5'` to `y` -<6> declare `int z`; - load from `map` -> `Map reference`; - load from `y` -> `String 'value5'`; - call `get` on `Map reference` with arguments(`String 'value5'`) -> `def`; - implicit cast `def` to `int 5` -> `int 5`; - store `int 5` to `z` -+ -* Map access with the `def` type. -+ -[source,Painless] ----- -def d = new HashMap(); <1> -d['value2'] = 2; <2> -d['value5'] = 5; <3> -int x = d['value2'] + d['value5']; <4> -String y = 'value5'; <5> -def z = d[y]; <6> ----- -+ -<1> declare `def d`; - allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `def` -> `def`; - store `def` to `d` -<2> load from `d` -> `def`; - implicit cast `def` to `HashMap reference` -> `HashMap reference`; - call `put` on `HashMap reference` with arguments(`String 'value2'`, `int 2`) -<3> load from `d` -> `def`; - implicit cast `def` to `HashMap reference` -> `HashMap reference`; - call `put` on `HashMap reference` with arguments(`String 'value5'`, `int 5`) -<4> declare `int x`; - load from `d` -> `def`; - implicit cast `def` to `HashMap reference` -> `HashMap reference`; - call `get` on `HashMap reference` with arguments(`String 'value2'`) - -> `def`; - implicit cast `def` to `int 2` -> `int 2`; - load from `d` -> `def`; - call `get` on `HashMap reference` with arguments(`String 'value5'`) - -> `def`; - implicit cast `def` to `int 5` -> `int 5`; - add `int 2` and `int 5` -> `int 7`; - store `int 7` to `x` -<5> declare `String y`; - store `String 'value5'` to `y` -<6> declare `def z`; - load from `d` -> `def`; - load from `y` -> `String 'value5'`; - call `get` on `HashMap reference` with arguments(`String 'value5'`) - -> `def`; - store `def` to `z` - -[[new-instance-operator]] -==== New Instance - -Use the `new instance operator 'new ()'` to allocate a -<> instance to the heap and call a specified -constructor. Implicit <> is evaluated as -necessary per argument during the constructor call. - -An overloaded constructor is one that shares the same name with two or more -constructors. A constructor is overloaded based on arity where the same -reference type name is re-used for multiple constructors as long as the number -of parameters differs. - -*Errors* - -* If the reference type name doesn't exist for instance allocation. -* If the number of arguments passed in is different from the number of specified - parameters. -* If the arguments cannot be implicitly cast or implicitly boxed/unboxed to the - correct type values for the parameters. - -*Grammar* - -[source,ANTLR4] ----- -new_instance: 'new' TYPE '(' (expression (',' expression)*)? ')'; ----- - -*Examples* - -* Allocation of new instances with different types. - -[source,Painless] ----- -Map m = new HashMap(); <1> -def d = new ArrayList(); <2> -def e = new HashMap(m); <3> ----- -<1> declare `Map m`; - allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `Map reference` -> `Map reference`; - store `Map reference` to `m`; -<2> declare `def d`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `def` -> `def`; - store `def` to `d`; -<3> declare `def e`; - load from `m` -> `Map reference`; - allocate `HashMap` instance with arguments (`Map reference`) - -> `HashMap reference`; - implicit cast `HashMap reference` to `def` -> `def`; - store `def` to `e`; - -[[string-concatenation-operator]] -==== String Concatenation - -Use the `string concatenation operator '+'` to concatenate two values together -where at least one of the values is a <>. - -*Grammar* - -[source,ANTLR4] ----- -concatenate: expression '+' expression; ----- - -*Examples* - -* String concatenation with different primitive types. -+ -[source,Painless] ----- -String x = "con"; <1> -String y = x + "cat"; <2> -String z = 4 + 5 + x; <3> ----- -+ -<1> declare `String x`; - store `String "con"` to `x`; -<2> declare `String y`; - load from `x` -> `String "con"`; - concat `String "con"` and `String "cat"` -> `String "concat"`; - store `String "concat"` to `y` -<3> declare `String z`; - add `int 4` and `int 5` -> `int 9`; - concat `int 9` and `String "9concat"`; - store `String "9concat"` to `z`; - (note the addition is done prior to the concatenation due to precedence and - associativity of the specific operations) -+ -* String concatenation with the `def` type. -+ -[source,Painless] ----- -def d = 2; <1> -d = "con" + d + "cat"; <2> ----- -+ -<1> declare `def`; - implicit cast `int 2` to `def` -> `def`; - store `def` in `d`; -<2> concat `String "con"` and `int 2` -> `String "con2"`; - concat `String "con2"` and `String "cat"` -> `String "con2cat"` - implicit cast `String "con2cat"` to `def` -> `def`; - store `def` to `d`; - (note the switch in type of `d` from `int` to `String`) - -[[elvis-operator]] -==== Elvis - -An elvis consists of two expressions. The first expression is evaluated -with to check for a `null` value. If the first expression evaluates to -`null` then the second expression is evaluated and its value used. If the first -expression evaluates to `non-null` then the resultant value of the first -expression is used. Use the `elvis operator '?:'` as a shortcut for the -conditional operator. - -*Errors* - -* If the first expression or second expression cannot produce a `null` value. - -*Grammar* - -[source,ANTLR4] ----- -elvis: expression '?:' expression; ----- - -*Examples* - -* Elvis with different reference types. -+ -[source,Painless] ----- -List x = new ArrayList(); <1> -List y = x ?: new ArrayList(); <2> -y = null; <3> -List z = y ?: new ArrayList(); <4> ----- -+ -<1> declare `List x`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `x`; -<2> declare `List y`; - load `x` -> `List reference`; - `List reference` equals `null` -> `false`; - evaluate 1st expression: `List reference` -> `List reference`; - store `List reference` to `y` -<3> store `null` to `y`; -<4> declare `List z`; - load `y` -> `List reference`; - `List reference` equals `null` -> `true`; - evaluate 2nd expression: - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `z`; diff --git a/docs/painless/painless-lang-spec/painless-operators.asciidoc b/docs/painless/painless-lang-spec/painless-operators.asciidoc deleted file mode 100644 index 47e086e88d90d..0000000000000 --- a/docs/painless/painless-lang-spec/painless-operators.asciidoc +++ /dev/null @@ -1,64 +0,0 @@ -[[painless-operators]] -=== Operators - -An operator is the most basic action that can be taken to evaluate values in a -script. An expression is one-to-many consecutive operations. Precedence is the -order in which an operator will be evaluated relative to another operator. -Associativity is the direction within an expression in which a specific operator -is evaluated. The following table lists all available operators: - -[cols="<6,<3,^3,^2,^4"] -|==== -| *Operator* | *Category* | *Symbol(s)* | *Precedence* | *Associativity* -| <> | <> | () | 0 | left -> right -| <> | <> | . () | 1 | left -> right -| <> | <> | . | 1 | left -> right -| <> | <> | ?. | 1 | left -> right -| <> | <> | () | 1 | left -> right -| <> | <> | [] {} | 1 | left -> right -| <> | <> | [] | 1 | left -> right -| <> | <> | . | 1 | left -> right -| <> | <> | [] | 1 | left -> right -| <> | <> | [] | 1 | left -> right -| <> | <> | [:] | 1 | left -> right -| <> | <> | [] | 1 | left -> right -| <> | <> | ++ | 1 | left -> right -| <> | <> | -- | 1 | left -> right -| <> | <> | ++ | 2 | right -> left -| <> | <> | -- | 2 | right -> left -| <> | <> | + | 2 | right -> left -| <> | <> | - | 2 | right -> left -| <> | <> | ! | 2 | right -> left -| <> | <> | ~ | 2 | right -> left -| <> | <> | () | 3 | right -> left -| <> | <> | new () | 3 | right -> left -| <> | <> | new [] | 3 | right -> left -| <> | <> | * | 4 | left -> right -| <> | <> | / | 4 | left -> right -| <> | <> | % | 4 | left -> right -| <> | <> | + | 5 | left -> right -| <> | <> | + | 5 | left -> right -| <> | <> | - | 5 | left -> right -| <> | <> | << | 6 | left -> right -| <> | <> | >> | 6 | left -> right -| <> | <> | >>> | 6 | left -> right -| <> | <> | > | 7 | left -> right -| <> | <> | >= | 7 | left -> right -| <> | <> | < | 7 | left -> right -| <> | <> | +++<=+++ | 7 | left -> right -| <> | <> | instanceof | 8 | left -> right -| <> | <> | == | 9 | left -> right -| <> | <> | != | 9 | left -> right -| <> | <> | === | 9 | left -> right -| <> | <> | !== | 9 | left -> right -| <> | <> | & | 10 | left -> right -| <> | <> | ^ | 11 | left -> right -| <> | <> | ^ | 11 | left -> right -| <> | <> | \| | 12 | left -> right -| <> | <> | && | 13 | left -> right -| <> | <> | \|\| | 14 | left -> right -| <> | <> | ? : | 15 | right -> left -| <> | <> | ?: | 16 | right -> left -| <> | <> | = | 17 | right -> left -| <> | <> | $= | 17 | right -> left -|==== diff --git a/docs/painless/painless-lang-spec/painless-regexes.asciidoc b/docs/painless/painless-lang-spec/painless-regexes.asciidoc deleted file mode 100644 index 962c4751aab1a..0000000000000 --- a/docs/painless/painless-lang-spec/painless-regexes.asciidoc +++ /dev/null @@ -1,37 +0,0 @@ -[[painless-regexes]] -=== Regexes - -Regular expression constants are directly supported. To ensure fast performance, -this is the only mechanism for creating patterns. Regular expressions -are always constants and compiled efficiently a single time. - -[source,painless] ---------------------------------------------------------- -Pattern p = /[aeiou]/ ---------------------------------------------------------- - -WARNING: A poorly written regular expression can significantly slow performance. -If possible, avoid using regular expressions, particularly in frequently run -scripts. - -[[pattern-flags]] -==== Pattern flags - -You can define flags on patterns in Painless by adding characters after the -trailing `/` like `/foo/i` or `/foo \w #comment/iUx`. Painless exposes all of -the flags from Java's -https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html[ -Pattern class] using these characters: - -[cols="<,<,<",options="header",] -|======================================================================= -| Character | Java Constant | Example -|`c` | CANON_EQ | `'å' ==~ /å/c` (open in hex editor to see) -|`i` | CASE_INSENSITIVE | `'A' ==~ /a/i` -|`l` | LITERAL | `'[a]' ==~ /[a]/l` -|`m` | MULTILINE | `'a\nb\nc' =~ /^b$/m` -|`s` | DOTALL (aka single line) | `'a\nb\nc' =~ /.b./s` -|`U` | UNICODE_CHARACTER_CLASS | `'Ɛ' ==~ /\\w/U` -|`u` | UNICODE_CASE | `'Ɛ' ==~ /ɛ/iu` -|`x` | COMMENTS (aka extended) | `'a' ==~ /a #comment/x` -|======================================================================= \ No newline at end of file diff --git a/docs/painless/painless-lang-spec/painless-scripts.asciidoc b/docs/painless/painless-lang-spec/painless-scripts.asciidoc deleted file mode 100644 index 6c01e6cfa9843..0000000000000 --- a/docs/painless/painless-lang-spec/painless-scripts.asciidoc +++ /dev/null @@ -1,6 +0,0 @@ -[[painless-scripts]] -=== Scripts - -Scripts are composed of one-to-many <> and are -run in a sandbox that determines what local variables are immediately available -along with what APIs are allowed. diff --git a/docs/painless/painless-lang-spec/painless-statements.asciidoc b/docs/painless/painless-lang-spec/painless-statements.asciidoc deleted file mode 100644 index b627507fa949f..0000000000000 --- a/docs/painless/painless-lang-spec/painless-statements.asciidoc +++ /dev/null @@ -1,57 +0,0 @@ -[[painless-statements]] -=== Statements - -Painless supports all of Java's https://docs.oracle.com/javase/tutorial/java/nutsandbolts/flow.html[ -control flow statements] except the `switch` statement. - -==== Conditional statements - -===== If / Else - -[source,painless] ---------------------------------------------------------- -if (doc[item].size() == 0) { - // do something if "item" is missing -} else if (doc[item].value == 'something') { - // do something if "item" value is: something -} else { - // do something else -} ---------------------------------------------------------- - -==== Loop statements - -===== For - -Painless also supports the `for in` syntax: - -[source,painless] ---------------------------------------------------------- -for (def item : list) { - // do something -} ---------------------------------------------------------- - -[source,painless] ---------------------------------------------------------- -for (item in list) { - // do something -} ---------------------------------------------------------- - -===== While -[source,painless] ---------------------------------------------------------- -while (ctx._source.item < condition) { - // do something -} ---------------------------------------------------------- - -===== Do-While -[source,painless] ---------------------------------------------------------- -do { - // do something -} -while (ctx._source.item < condition) ---------------------------------------------------------- diff --git a/docs/painless/painless-lang-spec/painless-types.asciidoc b/docs/painless/painless-lang-spec/painless-types.asciidoc deleted file mode 100644 index fca5fed1b12c2..0000000000000 --- a/docs/painless/painless-lang-spec/painless-types.asciidoc +++ /dev/null @@ -1,458 +0,0 @@ -[[painless-types]] -=== Types - -A type is a classification of data used to define the properties of a value. -These properties specify what data a value represents and the rules for how a -value is evaluated during an <>. Each type -belongs to one of the following categories: <>, -<>, or <>. - -[[primitive-types]] -==== Primitive Types - -A primitive type represents basic data built natively into the JVM and is -allocated to non-heap memory. Declare a primitive type -<> or access a primitive type member field (from -a reference type instance), and assign it a primitive type value for evaluation -during later operations. The default value for a newly-declared primitive type -variable is listed as part of the definitions below. A primitive type value is -copied during an assignment or as an argument for a method/function call. - -A primitive type has a corresponding reference type (also known as a boxed -type). Use the <> or -<> on a primitive type value to -force evaluation as its corresponding reference type value. - -The following primitive types are available. The corresponding reference type -is listed in parentheses. For example, `Byte` is the reference type for the -`byte` primitive type: - -[[available-primitive-types]] -.**Available primitive types** -[%collapsible%open] -==== -`byte` (`Byte`):: - 8-bit, signed, two's complement integer. Range: [`-128`, `127`]. Default: `0`. - -`short` (`Short`):: - 16-bit, signed, two's complement integer. Range: [`-32768`, `32767`]. Default: `0`. - -`char` (`Character`):: - 16-bit, unsigned, Unicode character. Range: [`0`, `65535`]. Default: `0` or `\u0000`. - -`int` (`Integer`):: - 32-bit, signed, two's complement integer. Range: [`-2^31`, `2^31-1`]. Default: `0`. - -`long` (`Long`):: - 64-bit, signed, two's complement integer. Range: [`-2^63`, `2^63-1`]. Default: `0`. - -`float (`Float`)`:: - 32-bit, signed, single-precision, IEEE 754 floating point number. Default `0.0`. - -`double` (`Double`):: - 64-bit, signed, double-precision, IEEE 754 floating point number. Default: `0.0`. - -`boolean` (`Boolean`):: - logical quantity with two possible values of `true` and `false`. Default: `false`. -==== - -*Examples* - -* Primitive types used in declaration, declaration and assignment. -+ -[source,Painless] ----- -int i = 1; <1> -double d; <2> -boolean b = true; <3> ----- -+ -<1> declare `int i`; - store `int 1` to `i` -<2> declare `double d`; - store default `double 0.0` to `d` -<3> declare `boolean b`; - store `boolean true` to `b` -+ -* Method call on a primitive type using the corresponding reference type. -+ -[source,Painless] ----- -int i = 1; <1> -i.toString(); <2> ----- -+ -<1> declare `int i`; - store `int 1` to `i` -<2> load from `i` -> `int 1`; - box `int 1` -> `Integer 1 reference`; - call `toString` on `Integer 1 reference` -> `String '1'` - -[[reference-types]] -==== Reference Types - -A reference type is a named construct (object), potentially representing -multiple pieces of data (member fields) and logic to manipulate that data -(member methods), defined as part of the application programming interface -(API) for scripts. - -A reference type instance is a single set of data for one reference type -object allocated to the heap. Use the -<> to allocate a reference type -instance. Use a reference type instance to load from, store to, and manipulate -complex data. - -A reference type value refers to a reference type instance, and multiple -reference type values may refer to the same reference type instance. A change to -a reference type instance will affect all reference type values referring to -that specific instance. - -Declare a reference type <> or access a reference -type member field (from a reference type instance), and assign it a reference -type value for evaluation during later operations. The default value for a -newly-declared reference type variable is `null`. A reference type value is -shallow-copied during an assignment or as an argument for a method/function -call. Assign `null` to a reference type variable to indicate the reference type -value refers to no reference type instance. The JVM will garbage collect a -reference type instance when it is no longer referred to by any reference type -values. Pass `null` as an argument to a method/function call to indicate the -argument refers to no reference type instance. - -A reference type object defines zero-to-many of each of the following: - -static member field:: - -A static member field is a named and typed piece of data. Each reference type -*object* contains one set of data representative of its static member fields. -Use the <> in correspondence with -the reference type object name to access a static member field for loading and -storing to a specific reference type *object*. No reference type instance -allocation is necessary to use a static member field. - -non-static member field:: - -A non-static member field is a named and typed piece of data. Each reference -type *instance* contains one set of data representative of its reference type -object's non-static member fields. Use the -<> for loading and storing to a -non-static member field of a specific reference type *instance*. An allocated -reference type instance is required to use a non-static member field. - -static member method:: - -A static member method is a <> called on a -reference type *object*. Use the <> -in correspondence with the reference type object name to call a static member -method. No reference type instance allocation is necessary to use a static -member method. - -non-static member method:: - -A non-static member method is a <> called on a -reference type *instance*. A non-static member method called on a reference type -instance can load from and store to non-static member fields of that specific -reference type instance. Use the <> -in correspondence with a specific reference type instance to call a non-static -member method. An allocated reference type instance is required to use a -non-static member method. - -constructor:: - -A constructor is a special type of <> used to -allocate a reference type *instance* defined by a specific reference type -*object*. Use the <> to allocate -a reference type instance. - -A reference type object follows a basic inheritance model. Consider types A and -B. Type A is considered to be a parent of B, and B a child of A, if B inherits -(is able to access as its own) all of A's non-static members. Type B is -considered a descendant of A if there exists a recursive parent-child -relationship from B to A with none to many types in between. In this case, B -inherits all of A's non-static members along with all of the non-static members -of the types in between. Type B is also considered to be a type A in both -relationships. - -*Examples* - -* Reference types evaluated in several different operations. -+ -[source,Painless] ----- -List l = new ArrayList(); <1> -l.add(1); <2> -int i = l.get(0) + 2; <3> ----- -+ -<1> declare `List l`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `l` -<2> load from `l` -> `List reference`; - implicit cast `int 1` to `def` -> `def` - call `add` on `List reference` with arguments (`def`) -<3> declare `int i`; - load from `l` -> `List reference`; - call `get` on `List reference` with arguments (`int 0`) -> `def`; - implicit cast `def` to `int 1` -> `int 1`; - add `int 1` and `int 2` -> `int 3`; - store `int 3` to `i` -+ -* Sharing a reference type instance. -+ -[source,Painless] ----- -List l0 = new ArrayList(); <1> -List l1 = l0; <2> -l0.add(1); <3> -l1.add(2); <4> -int i = l1.get(0) + l0.get(1); <5> ----- -+ -<1> declare `List l0`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `l0` -<2> declare `List l1`; - load from `l0` -> `List reference`; - store `List reference` to `l1` - (note `l0` and `l1` refer to the same instance known as a shallow-copy) -<3> load from `l0` -> `List reference`; - implicit cast `int 1` to `def` -> `def` - call `add` on `List reference` with arguments (`def`) -<4> load from `l1` -> `List reference`; - implicit cast `int 2` to `def` -> `def` - call `add` on `List reference` with arguments (`def`) -<5> declare `int i`; - load from `l0` -> `List reference`; - call `get` on `List reference` with arguments (`int 0`) -> `def @0`; - implicit cast `def @0` to `int 1` -> `int 1`; - load from `l1` -> `List reference`; - call `get` on `List reference` with arguments (`int 1`) -> `def @1`; - implicit cast `def @1` to `int 2` -> `int 2`; - add `int 1` and `int 2` -> `int 3`; - store `int 3` to `i`; -+ -* Using the static members of a reference type. -+ -[source,Painless] ----- -int i = Integer.MAX_VALUE; <1> -long l = Long.parseLong("123L"); <2> ----- -+ -<1> declare `int i`; - load from `MAX_VALUE` on `Integer` -> `int 2147483647`; - store `int 2147483647` to `i` -<2> declare `long l`; - call `parseLong` on `Long` with arguments (`long 123`) -> `long 123`; - store `long 123` to `l` - -[[dynamic-types]] -==== Dynamic Types - -A dynamic type value can represent the value of any primitive type or -reference type using a single type name `def`. A `def` type value mimics -the behavior of whatever value it represents at run-time and will always -represent the child-most descendant type value of any type value when evaluated -during operations. - -Declare a `def` type <> or access a `def` type -member field (from a reference type instance), and assign it any type of value -for evaluation during later operations. The default value for a newly-declared -`def` type variable is `null`. A `def` type variable or method/function -parameter can change the type it represents during the compilation and -evaluation of a script. - -Using the `def` type can have a slight impact on performance. Use only primitive -types and reference types directly when performance is critical. - -*Errors* - -* If a `def` type value represents an inappropriate type for evaluation of an - operation at run-time. - -*Examples* - -* General uses of the `def` type. -+ -[source,Painless] ----- -def dp = 1; <1> -def dr = new ArrayList(); <2> -dr = dp; <3> ----- -+ -<1> declare `def dp`; - implicit cast `int 1` to `def` -> `def`; - store `def` to `dp` -<2> declare `def dr`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `def` -> `def`; - store `def` to `dr` -<3> load from `dp` -> `def`; - store `def` to `dr`; - (note the switch in the type `dr` represents from `ArrayList` to `int`) -+ -* A `def` type value representing the child-most descendant of a value. -+ -[source,Painless] ----- -Object l = new ArrayList(); <1> -def d = l; <2> -d.ensureCapacity(10); <3> ----- -+ -<1> declare `Object l`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `Object reference` - -> `Object reference`; - store `Object reference` to `l` -<2> declare `def d`; - load from `l` -> `Object reference`; - implicit cast `Object reference` to `def` -> `def`; - store `def` to `d`; -<3> load from `d` -> `def`; - implicit cast `def` to `ArrayList reference` -> `ArrayList reference`; - call `ensureCapacity` on `ArrayList reference` with arguments (`int 10`); - (note `def` was implicit cast to `ArrayList reference` - since ArrayList` is the child-most descendant type value that the - `def` type value represents) - -[[string-type]] -==== String Type - -The `String` type is a specialized reference type that does not require -explicit allocation. Use a <> to directly -evaluate a `String` type value. While not required, the -<> can allocate `String` type -instances. - -*Examples* - -* General use of the `String` type. -+ -[source,Painless] ----- -String r = "some text"; <1> -String s = 'some text'; <2> -String t = new String("some text"); <3> -String u; <4> ----- -+ -<1> declare `String r`; - store `String "some text"` to `r` -<2> declare `String s`; - store `String 'some text'` to `s` -<3> declare `String t`; - allocate `String` instance with arguments (`String "some text"`) - -> `String "some text"`; - store `String "some text"` to `t` -<4> declare `String u`; - store default `null` to `u` - -[[void-type]] -==== void Type - -The `void` type represents the concept of a lack of type. Use the `void` type to -indicate a function returns no value. - -*Examples* - -* Use of the `void` type in a function. -+ -[source,Painless] ----- -void addToList(List l, def d) { - l.add(d); -} ----- - -[[array-type]] -==== Array Type - -An array type is a specialized reference type where an array type instance -contains a series of values allocated to the heap. Each value in an array type -instance is defined as an element. All elements in an array type instance are of -the same type (element type) specified as part of declaration. Each element is -assigned an index within the range `[0, length)` where length is the total -number of elements allocated for an array type instance. - -Use the <> or the -<> to allocate an -array type instance. Declare an array type <> or -access an array type member field (from a reference type instance), and assign -it an array type value for evaluation during later operations. The default value -for a newly-declared array type variable is `null`. An array type value is -shallow-copied during an assignment or as an argument for a method/function -call. Assign `null` to an array type variable to indicate the array type value -refers to no array type instance. The JVM will garbage collect an array type -instance when it is no longer referred to by any array type values. Pass `null` -as an argument to a method/function call to indicate the argument refers to no -array type instance. - -Use the <> to retrieve the length -of an array type value as an `int` type value. Use the -<> to load from and store to -an individual element within an array type instance. - -When an array type instance is allocated with multiple dimensions using the -range `[2, d]` where `d >= 2`, each element within each dimension in the range -`[1, d-1]` is also an array type. The element type of each dimension, `n`, is an -array type with the number of dimensions equal to `d-n`. For example, consider -`int[][][]` with 3 dimensions. Each element in the 3rd dimension, `d-3`, is the -primitive type `int`. Each element in the 2nd dimension, `d-2`, is the array -type `int[]`. And each element in the 1st dimension, `d-1` is the array type -`int[][]`. - -*Examples* - -* General use of single-dimensional arrays. -+ -[source,Painless] ----- -int[] x; <1> -float[] y = new float[10]; <2> -def z = new float[5]; <3> -y[9] = 1.0F; <4> -z[0] = y[9]; <5> ----- -+ -<1> declare `int[] x`; - store default `null` to `x` -<2> declare `float[] y`; - allocate `1-d float array` instance with `length [10]` - -> `1-d float array reference`; - store `1-d float array reference` to `y` -<3> declare `def z`; - allocate `1-d float array` instance with `length [5]` - -> `1-d float array reference`; - implicit cast `1-d float array reference` to `def` -> `def`; - store `def` to `z` -<4> load from `y` -> `1-d float array reference`; - store `float 1.0` to `index [9]` of `1-d float array reference` -<5> load from `y` -> `1-d float array reference @0`; - load from `index [9]` of `1-d float array reference @0` -> `float 1.0`; - load from `z` -> `def`; - implicit cast `def` to `1-d float array reference @1` - -> `1-d float array reference @1`; - store `float 1.0` to `index [0]` of `1-d float array reference @1` -+ -* General use of a multi-dimensional array. -+ -[source,Painless] ----- -int[][][] ia3 = new int[2][3][4]; <1> -ia3[1][2][3] = 99; <2> -int i = ia3[1][2][3]; <3> ----- -+ -<1> declare `int[][][] ia`; - allocate `3-d int array` instance with length `[2, 3, 4]` - -> `3-d int array reference`; - store `3-d int array reference` to `ia3` -<2> load from `ia3` -> `3-d int array reference`; - store `int 99` to `index [1, 2, 3]` of `3-d int array reference` -<3> declare `int i`; - load from `ia3` -> `3-d int array reference`; - load from `index [1, 2, 3]` of `3-d int array reference` -> `int 99`; - store `int 99` to `i` diff --git a/docs/painless/painless-lang-spec/painless-variables.asciidoc b/docs/painless/painless-lang-spec/painless-variables.asciidoc deleted file mode 100644 index d86b8ba17214a..0000000000000 --- a/docs/painless/painless-lang-spec/painless-variables.asciidoc +++ /dev/null @@ -1,204 +0,0 @@ -[[painless-variables]] -=== Variables - -A variable loads and stores a value for evaluation during -<>. - -[[variable-declaration]] -==== Declaration - -Declare a variable before use with the format of <> -followed by <>. Declare an -<> variable using an opening `[` token and a closing `]` -token for each dimension directly after the identifier. Specify a -comma-separated list of identifiers following the type to declare multiple -variables in a single statement. Use an -<> combined with a declaration to -immediately assign a value to a variable. A variable not immediately assigned a -value will have a default value assigned implicitly based on the type. - -*Errors* - -* If a variable is used prior to or without declaration. - -*Grammar* - -[source,ANTLR4] ----- -declaration : type ID assignment? (',' ID assignment?)*; -type: ID ('.' ID)* ('[' ']')*; -assignment: '=' expression; ----- - -*Examples* - -* Different variations of variable declaration. -+ -[source,Painless] ----- -int x; <1> -List y; <2> -int x, y = 5, z; <3> -def d; <4> -int i = 10; <5> -float[] f; <6> -Map[][] m; <7> ----- -+ -<1> declare `int x`; - store default `null` to `x` -<2> declare `List y`; - store default `null` to `y` -<3> declare `int x`; - store default `int 0` to `x`; - declare `int y`; - store `int 5` to `y`; - declare `int z`; - store default `int 0` to `z`; -<4> declare `def d`; - store default `null` to `d` -<5> declare `int i`; - store `int 10` to `i` -<6> declare `float[] f`; - store default `null` to `f` -<7> declare `Map[][] m`; - store default `null` to `m` - -[[variable-assignment]] -==== Assignment - -Use the `assignment operator '='` to store a value in a variable for use in -subsequent operations. Any operation that produces a value can be assigned to -any variable as long as the <> are the same or the -resultant type can be <> to the variable -type. - -*Errors* - -* If the type of value is unable to match the type of variable. - -*Grammar* - -[source,ANTLR4] ----- -assignment: ID '=' expression ----- - -*Examples* - -* Variable assignment with an integer literal. -+ -[source,Painless] ----- -int i; <1> -i = 10; <2> ----- -+ -<1> declare `int i`; - store default `int 0` to `i` -<2> store `int 10` to `i` -+ -* Declaration combined with immediate assignment. -+ -[source,Painless] ----- -int i = 10; <1> -double j = 2.0; <2> ----- -+ -<1> declare `int i`; - store `int 10` to `i` -<2> declare `double j`; - store `double 2.0` to `j` -+ -* Assignment of one variable to another using primitive type values. -+ -[source,Painless] ----- -int i = 10; <1> -int j = i; <2> ----- -+ -<1> declare `int i`; - store `int 10` to `i` -<2> declare `int j`; - load from `i` -> `int 10`; - store `int 10` to `j` -+ -* Assignment with reference types using the - <>. -+ -[source,Painless] ----- -ArrayList l = new ArrayList(); <1> -Map m = new HashMap(); <2> ----- -+ -<1> declare `ArrayList l`; - allocate `ArrayList` instance -> `ArrayList reference`; - store `ArrayList reference` to `l` -<2> declare `Map m`; - allocate `HashMap` instance -> `HashMap reference`; - implicit cast `HashMap reference` to `Map reference` -> `Map reference`; - store `Map reference` to `m` -+ -* Assignment of one variable to another using reference type values. -+ -[source,Painless] ----- -List l = new ArrayList(); <1> -List k = l; <2> -List m; <3> -m = k; <4> ----- -+ -<1> declare `List l`; - allocate `ArrayList` instance -> `ArrayList reference`; - implicit cast `ArrayList reference` to `List reference` -> `List reference`; - store `List reference` to `l` -<2> declare `List k`; - load from `l` -> `List reference`; - store `List reference` to `k`; - (note `l` and `k` refer to the same instance known as a shallow-copy) -<3> declare `List m`; - store default `null` to `m` -<4> load from `k` -> `List reference`; - store `List reference` to `m`; - (note `l`, `k`, and `m` refer to the same instance) -+ -* Assignment with array type variables using the - <>. -+ -[source,Painless] ----- -int[] ia1; <1> -ia1 = new int[2]; <2> -ia1[0] = 1; <3> -int[] ib1 = ia1; <4> -int[][] ic2 = new int[2][5]; <5> -ic2[1][3] = 2; <6> -ic2[0] = ia1; <7> ----- -+ -<1> declare `int[] ia1`; - store default `null` to `ia1` -<2> allocate `1-d int array` instance with `length [2]` - -> `1-d int array reference`; - store `1-d int array reference` to `ia1` -<3> load from `ia1` -> `1-d int array reference`; - store `int 1` to `index [0]` of `1-d int array reference` -<4> declare `int[] ib1`; - load from `ia1` -> `1-d int array reference`; - store `1-d int array reference` to `ib1`; - (note `ia1` and `ib1` refer to the same instance known as a shallow copy) -<5> declare `int[][] ic2`; - allocate `2-d int array` instance with `length [2, 5]` - -> `2-d int array reference`; - store `2-d int array reference` to `ic2` -<6> load from `ic2` -> `2-d int array reference`; - store `int 2` to `index [1, 3]` of `2-d int array reference` -<7> load from `ia1` -> `1-d int array reference`; - load from `ic2` -> `2-d int array reference`; - store `1-d int array reference` to - `index [0]` of `2-d int array reference`; - (note `ia1`, `ib1`, and `index [0]` of `ia2` refer to the same instance) diff --git a/docs/painless/redirects.asciidoc b/docs/painless/redirects.asciidoc deleted file mode 100644 index 94dd5524e9acd..0000000000000 --- a/docs/painless/redirects.asciidoc +++ /dev/null @@ -1,9 +0,0 @@ -["appendix",role="exclude",id="redirects"] -= Deleted pages - -The following pages have moved or been deleted. - -[role="exclude",id="painless-examples"] -=== Painless examples - -See <>. \ No newline at end of file diff --git a/docs/plugins/analysis-icu.asciidoc b/docs/plugins/analysis-icu.asciidoc deleted file mode 100644 index da7efd2843f50..0000000000000 --- a/docs/plugins/analysis-icu.asciidoc +++ /dev/null @@ -1,557 +0,0 @@ -[[analysis-icu]] -=== ICU analysis plugin - -The ICU Analysis plugin integrates the Lucene ICU module into {es}, -adding extended Unicode support using the https://icu.unicode.org/[ICU] -libraries, including better analysis of Asian languages, Unicode -normalization, Unicode-aware case folding, collation support, and -transliteration. - -[IMPORTANT] -.ICU analysis and backwards compatibility -================================================ - -From time to time, the ICU library receives updates such as adding new -characters and emojis, and improving collation (sort) orders. These changes -may or may not affect search and sort orders, depending on which characters -sets you are using. - -While we restrict ICU upgrades to major versions, you may find that an index -created in the previous major version will need to be reindexed in order to -return correct (and correctly ordered) results, and to take advantage of new -characters. - -================================================ - -:plugin_name: analysis-icu -include::install_remove.asciidoc[] - -[[analysis-icu-analyzer]] -==== ICU analyzer - -The `icu_analyzer` analyzer performs basic normalization, tokenization and character folding, using the -`icu_normalizer` char filter, `icu_tokenizer` and `icu_folding` token filter - -The following parameters are accepted: - -[horizontal] - -`method`:: - - Normalization method. Accepts `nfkc`, `nfc` or `nfkc_cf` (default) - -`mode`:: - - Normalization mode. Accepts `compose` (default) or `decompose`. - -[[analysis-icu-normalization-charfilter]] -==== ICU normalization character filter - -Normalizes characters as explained -https://unicode-org.github.io/icu/userguide/transforms/normalization/[here]. -It registers itself as the `icu_normalizer` character filter, which is -available to all indices without any further configuration. The type of -normalization can be specified with the `name` parameter, which accepts `nfc`, -`nfkc`, and `nfkc_cf` (default). Set the `mode` parameter to `decompose` to -convert `nfc` to `nfd` or `nfkc` to `nfkd` respectively: - -Which letters are normalized can be controlled by specifying the -`unicode_set_filter` parameter, which accepts a -https://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.html[UnicodeSet]. - -Here are two examples, the default usage and a customised character filter: - - -[source,console] --------------------------------------------------- -PUT icu_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "nfkc_cf_normalized": { <1> - "tokenizer": "icu_tokenizer", - "char_filter": [ - "icu_normalizer" - ] - }, - "nfd_normalized": { <2> - "tokenizer": "icu_tokenizer", - "char_filter": [ - "nfd_normalizer" - ] - } - }, - "char_filter": { - "nfd_normalizer": { - "type": "icu_normalizer", - "name": "nfc", - "mode": "decompose" - } - } - } - } - } -} --------------------------------------------------- - -<1> Uses the default `nfkc_cf` normalization. -<2> Uses the customized `nfd_normalizer` token filter, which is set to use `nfc` normalization with decomposition. - -[[analysis-icu-tokenizer]] -==== ICU tokenizer - -Tokenizes text into words on word boundaries, as defined in -https://www.unicode.org/reports/tr29/[UAX #29: Unicode Text Segmentation]. -It behaves much like the {ref}/analysis-standard-tokenizer.html[`standard` tokenizer], -but adds better support for some Asian languages by using a dictionary-based -approach to identify words in Thai, Lao, Chinese, Japanese, and Korean, and -using custom rules to break Myanmar and Khmer text into syllables. - -[source,console] --------------------------------------------------- -PUT icu_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "my_icu_analyzer": { - "tokenizer": "icu_tokenizer" - } - } - } - } - } -} --------------------------------------------------- - -===== Rules customization - -experimental[This functionality is marked as experimental in Lucene] - -You can customize the `icu-tokenizer` behavior by specifying per-script rule files, see the -http://userguide.icu-project.org/boundaryanalysis#TOC-RBBI-Rules[RBBI rules syntax reference] -for a more detailed explanation. - -To add icu tokenizer rules, set the `rule_files` settings, which should contain a comma-separated list of -`code:rulefile` pairs in the following format: -https://unicode.org/iso15924/iso15924-codes.html[four-letter ISO 15924 script code], -followed by a colon, then a rule file name. Rule files are placed `ES_HOME/config` directory. - -As a demonstration of how the rule files can be used, save the following user file to `$ES_HOME/config/KeywordTokenizer.rbbi`: - -[source,text] ------------------------ -.+ {200}; ------------------------ - -Then create an analyzer to use this rule file as follows: - -[source,console] --------------------------------------------------- -PUT icu_sample -{ - "settings": { - "index": { - "analysis": { - "tokenizer": { - "icu_user_file": { - "type": "icu_tokenizer", - "rule_files": "Latn:KeywordTokenizer.rbbi" - } - }, - "analyzer": { - "my_analyzer": { - "type": "custom", - "tokenizer": "icu_user_file" - } - } - } - } - } -} - -GET icu_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "Elasticsearch. Wow!" -} --------------------------------------------------- - -The above `analyze` request returns the following: - -[source,console-result] --------------------------------------------------- -{ - "tokens": [ - { - "token": "Elasticsearch. Wow!", - "start_offset": 0, - "end_offset": 19, - "type": "", - "position": 0 - } - ] -} --------------------------------------------------- - - -[[analysis-icu-normalization]] -==== ICU normalization token filter - -Normalizes characters as explained -https://unicode-org.github.io/icu/userguide/transforms/normalization/[here]. It registers -itself as the `icu_normalizer` token filter, which is available to all indices -without any further configuration. The type of normalization can be specified -with the `name` parameter, which accepts `nfc`, `nfkc`, and `nfkc_cf` -(default). - -Which letters are normalized can be controlled by specifying the -`unicode_set_filter` parameter, which accepts a -https://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.html[UnicodeSet]. - -You should probably prefer the <>. - -Here are two examples, the default usage and a customised token filter: - -[source,console] --------------------------------------------------- -PUT icu_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "nfkc_cf_normalized": { <1> - "tokenizer": "icu_tokenizer", - "filter": [ - "icu_normalizer" - ] - }, - "nfc_normalized": { <2> - "tokenizer": "icu_tokenizer", - "filter": [ - "nfc_normalizer" - ] - } - }, - "filter": { - "nfc_normalizer": { - "type": "icu_normalizer", - "name": "nfc" - } - } - } - } - } -} --------------------------------------------------- - -<1> Uses the default `nfkc_cf` normalization. -<2> Uses the customized `nfc_normalizer` token filter, which is set to use `nfc` normalization. - - -[[analysis-icu-folding]] -==== ICU folding token filter - -Case folding of Unicode characters based on `UTR#30`, like the -{ref}/analysis-asciifolding-tokenfilter.html[ASCII-folding token filter] -on steroids. It registers itself as the `icu_folding` token filter and is -available to all indices: - -[source,console] --------------------------------------------------- -PUT icu_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "folded": { - "tokenizer": "icu_tokenizer", - "filter": [ - "icu_folding" - ] - } - } - } - } - } -} --------------------------------------------------- - -The ICU folding token filter already does Unicode normalization, so there is -no need to use Normalize character or token filter as well. - -Which letters are folded can be controlled by specifying the -`unicode_set_filter` parameter, which accepts a -https://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.html[UnicodeSet]. - -The following example exempts Swedish characters from folding. It is important -to note that both upper and lowercase forms should be specified, and that -these filtered character are not lowercased which is why we add the -`lowercase` filter as well: - -[source,console] --------------------------------------------------- -PUT icu_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "swedish_analyzer": { - "tokenizer": "icu_tokenizer", - "filter": [ - "swedish_folding", - "lowercase" - ] - } - }, - "filter": { - "swedish_folding": { - "type": "icu_folding", - "unicode_set_filter": "[^åäöÅÄÖ]" - } - } - } - } - } -} --------------------------------------------------- - - -[[analysis-icu-collation]] -==== ICU collation token filter - -[WARNING] -====== -This token filter has been deprecated since Lucene 5.0. Please use -<>. -====== - -[[analysis-icu-collation-keyword-field]] -==== ICU collation keyword field - -Collations are used for sorting documents in a language-specific word order. -The `icu_collation_keyword` field type is available to all indices and will encode -the terms directly as bytes in a doc values field and a single indexed token just -like a standard {ref}/keyword.html[Keyword Field]. - -Defaults to using {defguide}/sorting-collations.html#uca[DUCET collation], -which is a best-effort attempt at language-neutral sorting. - -Below is an example of how to set up a field for sorting German names in -``phonebook'' order: - -[source,console] --------------------------- -PUT my-index-000001 -{ - "mappings": { - "properties": { - "name": { <1> - "type": "text", - "fields": { - "sort": { <2> - "type": "icu_collation_keyword", - "index": false, - "language": "de", - "country": "DE", - "variant": "@collation=phonebook" - } - } - } - } - } -} - -GET /my-index-000001/_search <3> -{ - "query": { - "match": { - "name": "Fritz" - } - }, - "sort": "name.sort" -} - --------------------------- - -<1> The `name` field uses the `standard` analyzer, and so supports full text queries. -<2> The `name.sort` field is an `icu_collation_keyword` field that will preserve the name as - a single token doc values, and applies the German ``phonebook'' order. -<3> An example query which searches the `name` field and sorts on the `name.sort` field. - -===== Parameters for ICU collation keyword fields - -The following parameters are accepted by `icu_collation_keyword` fields: - -[horizontal] - -`doc_values`:: - - Should the field be stored on disk in a column-stride fashion, so that it - can later be used for sorting, aggregations, or scripting? Accepts `true` - (default) or `false`. - -`index`:: - - Should the field be searchable? Accepts `true` (default) or `false`. - -`null_value`:: - - Accepts a string value which is substituted for any explicit `null` - values. Defaults to `null`, which means the field is treated as missing. - -{ref}/ignore-above.html[`ignore_above`]:: - - Strings longer than the `ignore_above` setting will be ignored. - Checking is performed on the original string before the collation. - The `ignore_above` setting can be updated on existing fields - using the {ref}/indices-put-mapping.html[PUT mapping API]. - By default, there is no limit and all values will be indexed. - -`store`:: - - Whether the field value should be stored and retrievable separately from - the {ref}/mapping-source-field.html[`_source`] field. Accepts `true` or `false` - (default). - -`fields`:: - - Multi-fields allow the same string value to be indexed in multiple ways for - different purposes, such as one field for search and a multi-field for - sorting and aggregations. - -===== Collation options - -`strength`:: - -The strength property determines the minimum level of difference considered -significant during comparison. Possible values are : `primary`, `secondary`, -`tertiary`, `quaternary` or `identical`. See the -https://icu-project.org/apiref/icu4j/com/ibm/icu/text/Collator.html[ICU Collation documentation] -for a more detailed explanation for each value. Defaults to `tertiary` -unless otherwise specified in the collation. - -`decomposition`:: - -Possible values: `no` (default, but collation-dependent) or `canonical`. -Setting this decomposition property to `canonical` allows the Collator to -handle unnormalized text properly, producing the same results as if the text -were normalized. If `no` is set, it is the user's responsibility to ensure -that all text is already in the appropriate form before a comparison or before -getting a CollationKey. Adjusting decomposition mode allows the user to select -between faster and more complete collation behavior. Since a great many of the -world's languages do not require text normalization, most locales set `no` as -the default decomposition mode. - -The following options are expert only: - -`alternate`:: - -Possible values: `shifted` or `non-ignorable`. Sets the alternate handling for -strength `quaternary` to be either shifted or non-ignorable. Which boils down -to ignoring punctuation and whitespace. - -`case_level`:: - -Possible values: `true` or `false` (default). Whether case level sorting is -required. When strength is set to `primary` this will ignore accent -differences. - - -`case_first`:: - -Possible values: `lower` or `upper`. Useful to control which case is sorted -first when the case is not ignored for strength `tertiary`. The default depends on -the collation. - -`numeric`:: - -Possible values: `true` or `false` (default) . Whether digits are sorted -according to their numeric representation. For example the value `egg-9` is -sorted before the value `egg-21`. - - -`variable_top`:: - -Single character or contraction. Controls what is variable for `alternate`. - -`hiragana_quaternary_mode`:: - -Possible values: `true` or `false`. Distinguishing between Katakana and -Hiragana characters in `quaternary` strength. - - -[[analysis-icu-transform]] -==== ICU transform token filter - -Transforms are used to process Unicode text in many different ways, such as -case mapping, normalization, transliteration and bidirectional text handling. - -You can define which transformation you want to apply with the `id` parameter -(defaults to `Null`), and specify text direction with the `dir` parameter -which accepts `forward` (default) for LTR and `reverse` for RTL. Custom -rulesets are not yet supported. - -For example: - -[source,console] --------------------------------------------------- -PUT icu_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "latin": { - "tokenizer": "keyword", - "filter": [ - "myLatinTransform" - ] - } - }, - "filter": { - "myLatinTransform": { - "type": "icu_transform", - "id": "Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC" <1> - } - } - } - } - } -} - -GET icu_sample/_analyze -{ - "analyzer": "latin", - "text": "你好" <2> -} - -GET icu_sample/_analyze -{ - "analyzer": "latin", - "text": "здравствуйте" <3> -} - -GET icu_sample/_analyze -{ - "analyzer": "latin", - "text": "こんにちは" <4> -} - --------------------------------------------------- - -<1> This transforms transliterates characters to Latin, and separates accents - from their base characters, removes the accents, and then puts the - remaining text into an unaccented form. - -<2> Returns `ni hao`. -<3> Returns `zdravstvujte`. -<4> Returns `kon'nichiha`. - -For more documentation, Please see the https://unicode-org.github.io/icu/userguide/transforms/[user guide of ICU Transform]. diff --git a/docs/plugins/analysis-kuromoji.asciidoc b/docs/plugins/analysis-kuromoji.asciidoc deleted file mode 100644 index 217d88f361223..0000000000000 --- a/docs/plugins/analysis-kuromoji.asciidoc +++ /dev/null @@ -1,788 +0,0 @@ -[[analysis-kuromoji]] -=== Japanese (kuromoji) analysis plugin - -The Japanese (kuromoji) analysis plugin integrates Lucene kuromoji analysis -module into {es}. - -:plugin_name: analysis-kuromoji -include::install_remove.asciidoc[] - -[[analysis-kuromoji-analyzer]] -==== `kuromoji` analyzer - -The `kuromoji` analyzer uses the following analysis chain: - -* `CJKWidthCharFilter` from Lucene -* <> -* <> token filter -* <> token filter -* <> token filter -* <> token filter -* {ref}/analysis-lowercase-tokenfilter.html[`lowercase`] token filter - -It supports the `mode` and `user_dictionary` settings from -<>. - -[discrete] -[[kuromoji-analyzer-normalize-full-width-characters]] -==== Normalize full-width characters - -The `kuromoji_tokenizer` tokenizer uses characters from the MeCab-IPADIC -dictionary to split text into tokens. The dictionary includes some full-width -characters, such as `o` and `f`. If a text contains full-width characters, -the tokenizer can produce unexpected tokens. - -For example, the `kuromoji_tokenizer` tokenizer converts the text -`Culture of Japan` to the tokens `[ culture, o, f, japan ]` -instead of `[ culture, of, japan ]`. - -To avoid this, add the <> to a custom analyzer based on the `kuromoji` analyzer. The -`icu_normalizer` character filter converts full-width characters to their normal -equivalents. - -First, duplicate the `kuromoji` analyzer to create the basis for a custom -analyzer. Then add the `icu_normalizer` character filter to the custom analyzer. -For example: - -[source,console] ----- -PUT index-00001 -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "kuromoji_normalize": { <1> - "char_filter": [ - "icu_normalizer" <2> - ], - "tokenizer": "kuromoji_tokenizer", - "filter": [ - "kuromoji_baseform", - "kuromoji_part_of_speech", - "cjk_width", - "ja_stop", - "kuromoji_stemmer", - "lowercase" - ] - } - } - } - } - } -} ----- -<1> Creates a new custom analyzer, `kuromoji_normalize`, based on the `kuromoji` -analyzer. -<2> Adds the `icu_normalizer` character filter to the analyzer. - - -[[analysis-kuromoji-charfilter]] -==== `kuromoji_iteration_mark` character filter - -The `kuromoji_iteration_mark` normalizes Japanese horizontal iteration marks -(_odoriji_) to their expanded form. It accepts the following settings: - -`normalize_kanji`:: - - Indicates whether kanji iteration marks should be normalized. Defaults to `true`. - -`normalize_kana`:: - - Indicates whether kana iteration marks should be normalized. Defaults to `true` - - -[[analysis-kuromoji-tokenizer]] -==== `kuromoji_tokenizer` - -The `kuromoji_tokenizer` accepts the following settings: - -`mode`:: -+ --- - -The tokenization mode determines how the tokenizer handles compound and -unknown words. It can be set to: - -`normal`:: - - Normal segmentation, no decomposition for compounds. Example output: - - 関西国際空港 - アブラカダブラ - -`search`:: - - Segmentation geared towards search. This includes a decompounding process - for long nouns, also including the full compound token as a synonym. - Example output: - - 関西, 関西国際空港, 国際, 空港 - アブラカダブラ - -`extended`:: - - Extended mode outputs unigrams for unknown words. Example output: - - 関西, 関西国際空港, 国際, 空港 - ア, ブ, ラ, カ, ダ, ブ, ラ --- - -`discard_punctuation`:: - - Whether punctuation should be discarded from the output. Defaults to `true`. - -`lenient`:: - - Whether the `user_dictionary` should be deduplicated on the provided `text`. - False by default causing duplicates to generate an error. - -`user_dictionary`:: -+ --- -The Kuromoji tokenizer uses the MeCab-IPADIC dictionary by default. A `user_dictionary` -may be appended to the default dictionary. The dictionary should have the following CSV format: - -[source,csv] ------------------------ -, ... , ... , ------------------------ --- - -As a demonstration of how the user dictionary can be used, save the following -dictionary to `$ES_HOME/config/userdict_ja.txt`: - -[source,csv] ------------------------ -東京スカイツリー,東京 スカイツリー,トウキョウ スカイツリー,カスタム名詞 ------------------------ - --- - -You can also inline the rules directly in the tokenizer definition using -the `user_dictionary_rules` option: - -[source,console] --------------------------------------------------- -PUT kuromoji_sample -{ - "settings": { - "index": { - "analysis": { - "tokenizer": { - "kuromoji_user_dict": { - "type": "kuromoji_tokenizer", - "mode": "extended", - "user_dictionary_rules": ["東京スカイツリー,東京 スカイツリー,トウキョウ スカイツリー,カスタム名詞"] - } - }, - "analyzer": { - "my_analyzer": { - "type": "custom", - "tokenizer": "kuromoji_user_dict" - } - } - } - } - } -} --------------------------------------------------- --- - -`nbest_cost`/`nbest_examples`:: -+ --- -Additional expert user parameters `nbest_cost` and `nbest_examples` can be used -to include additional tokens that are most likely according to the statistical model. -If both parameters are used, the largest number of both is applied. - -`nbest_cost`:: - - The `nbest_cost` parameter specifies an additional Viterbi cost. - The KuromojiTokenizer will include all tokens in Viterbi paths that are - within the nbest_cost value of the best path. - -`nbest_examples`:: - - The `nbest_examples` can be used to find a `nbest_cost` value based on examples. - For example, a value of /箱根山-箱根/成田空港-成田/ indicates that in the texts, - 箱根山 (Mt. Hakone) and 成田空港 (Narita Airport) we'd like a cost that gives is us - 箱根 (Hakone) and 成田 (Narita). --- - - -Then create an analyzer as follows: - -[source,console] --------------------------------------------------- -PUT kuromoji_sample -{ - "settings": { - "index": { - "analysis": { - "tokenizer": { - "kuromoji_user_dict": { - "type": "kuromoji_tokenizer", - "mode": "extended", - "discard_punctuation": "false", - "user_dictionary": "userdict_ja.txt", - "lenient": "true" - } - }, - "analyzer": { - "my_analyzer": { - "type": "custom", - "tokenizer": "kuromoji_user_dict" - } - } - } - } - } -} - -GET kuromoji_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "東京スカイツリー" -} --------------------------------------------------- - -The above `analyze` request returns the following: - -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ { - "token" : "東京", - "start_offset" : 0, - "end_offset" : 2, - "type" : "word", - "position" : 0 - }, { - "token" : "スカイツリー", - "start_offset" : 2, - "end_offset" : 8, - "type" : "word", - "position" : 1 - } ] -} --------------------------------------------------- - -`discard_compound_token`:: - Whether original compound tokens should be discarded from the output with `search` mode. Defaults to `false`. - Example output with `search` or `extended` mode and this option `true`: - - 関西, 国際, 空港 - -NOTE: If a text contains full-width characters, the `kuromoji_tokenizer` -tokenizer can produce unexpected tokens. To avoid this, add the -<> to -your analyzer. See <>. - - -[[analysis-kuromoji-baseform]] -==== `kuromoji_baseform` token filter - -The `kuromoji_baseform` token filter replaces terms with their -BaseFormAttribute. This acts as a lemmatizer for verbs and adjectives. Example: - -[source,console] --------------------------------------------------- -PUT kuromoji_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "kuromoji_tokenizer", - "filter": [ - "kuromoji_baseform" - ] - } - } - } - } - } -} - -GET kuromoji_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "飲み" -} --------------------------------------------------- - -which responds with: - -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ { - "token" : "飲む", - "start_offset" : 0, - "end_offset" : 2, - "type" : "word", - "position" : 0 - } ] -} --------------------------------------------------- - - -[[analysis-kuromoji-speech]] -==== `kuromoji_part_of_speech` token filter - -The `kuromoji_part_of_speech` token filter removes tokens that match a set of -part-of-speech tags. It accepts the following setting: - -`stoptags`:: - - An array of part-of-speech tags that should be removed. It defaults to the - `stoptags.txt` file embedded in the `lucene-analyzer-kuromoji.jar`. - -For example: - -[source,console] --------------------------------------------------- -PUT kuromoji_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "kuromoji_tokenizer", - "filter": [ - "my_posfilter" - ] - } - }, - "filter": { - "my_posfilter": { - "type": "kuromoji_part_of_speech", - "stoptags": [ - "助詞-格助詞-一般", - "助詞-終助詞" - ] - } - } - } - } - } -} - -GET kuromoji_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "寿司がおいしいね" -} --------------------------------------------------- - -Which responds with: - -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ { - "token" : "寿司", - "start_offset" : 0, - "end_offset" : 2, - "type" : "word", - "position" : 0 - }, { - "token" : "おいしい", - "start_offset" : 3, - "end_offset" : 7, - "type" : "word", - "position" : 2 - } ] -} --------------------------------------------------- - - -[[analysis-kuromoji-readingform]] -==== `kuromoji_readingform` token filter - -The `kuromoji_readingform` token filter replaces the token with its reading -form in either katakana or romaji. It accepts the following setting: - -`use_romaji`:: - - Whether romaji reading form should be output instead of katakana. Defaults to `false`. - -When using the pre-defined `kuromoji_readingform` filter, `use_romaji` is set -to `true`. The default when defining a custom `kuromoji_readingform`, however, -is `false`. The only reason to use the custom form is if you need the -katakana reading form: - -[source,console] --------------------------------------------------- -PUT kuromoji_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "romaji_analyzer": { - "tokenizer": "kuromoji_tokenizer", - "filter": [ "romaji_readingform" ] - }, - "katakana_analyzer": { - "tokenizer": "kuromoji_tokenizer", - "filter": [ "katakana_readingform" ] - } - }, - "filter": { - "romaji_readingform": { - "type": "kuromoji_readingform", - "use_romaji": true - }, - "katakana_readingform": { - "type": "kuromoji_readingform", - "use_romaji": false - } - } - } - } - } -} - -GET kuromoji_sample/_analyze -{ - "analyzer": "katakana_analyzer", - "text": "寿司" <1> -} - -GET kuromoji_sample/_analyze -{ - "analyzer": "romaji_analyzer", - "text": "寿司" <2> -} --------------------------------------------------- - -<1> Returns `スシ`. -<2> Returns `sushi`. - -[[analysis-kuromoji-stemmer]] -==== `kuromoji_stemmer` token filter - -The `kuromoji_stemmer` token filter normalizes common katakana spelling -variations ending in a long sound character by removing this character -(U+30FC). Only full-width katakana characters are supported. - -This token filter accepts the following setting: - -`minimum_length`:: - - Katakana words shorter than the `minimum length` are not stemmed (default - is `4`). - - -[source,console] --------------------------------------------------- -PUT kuromoji_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "kuromoji_tokenizer", - "filter": [ - "my_katakana_stemmer" - ] - } - }, - "filter": { - "my_katakana_stemmer": { - "type": "kuromoji_stemmer", - "minimum_length": 4 - } - } - } - } - } -} - -GET kuromoji_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "コピー" <1> -} - -GET kuromoji_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "サーバー" <2> -} --------------------------------------------------- - -<1> Returns `コピー`. -<2> Return `サーバ`. - - -[[analysis-kuromoji-stop]] -==== `ja_stop` token filter - -The `ja_stop` token filter filters out Japanese stopwords (`_japanese_`), and -any other custom stopwords specified by the user. This filter only supports -the predefined `_japanese_` stopwords list. If you want to use a different -predefined list, then use the -{ref}/analysis-stop-tokenfilter.html[`stop` token filter] instead. - -[source,console] --------------------------------------------------- -PUT kuromoji_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "analyzer_with_ja_stop": { - "tokenizer": "kuromoji_tokenizer", - "filter": [ - "ja_stop" - ] - } - }, - "filter": { - "ja_stop": { - "type": "ja_stop", - "stopwords": [ - "_japanese_", - "ストップ" - ] - } - } - } - } - } -} - -GET kuromoji_sample/_analyze -{ - "analyzer": "analyzer_with_ja_stop", - "text": "ストップは消える" -} --------------------------------------------------- - -The above request returns: - -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ { - "token" : "消える", - "start_offset" : 5, - "end_offset" : 8, - "type" : "word", - "position" : 2 - } ] -} --------------------------------------------------- - - -[[analysis-kuromoji-number]] -==== `kuromoji_number` token filter - -The `kuromoji_number` token filter normalizes Japanese numbers (kansūji) -to regular Arabic decimal numbers in half-width characters. For example: - -[source,console] --------------------------------------------------- -PUT kuromoji_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "kuromoji_tokenizer", - "filter": [ - "kuromoji_number" - ] - } - } - } - } - } -} - -GET kuromoji_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "一〇〇〇" -} --------------------------------------------------- - -Which results in: - -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ { - "token" : "1000", - "start_offset" : 0, - "end_offset" : 4, - "type" : "word", - "position" : 0 - } ] -} --------------------------------------------------- - -[[analysis-kuromoji-hiragana-uppercase]] -==== `hiragana_uppercase` token filter - -The `hiragana_uppercase` token filter normalizes small letters (捨て仮名) in hiragana into standard letters. -This filter is useful if you want to search against old style Japanese text such as -patents, legal documents, contract policies, etc. - -For example: - -[source,console] --------------------------------------------------- -PUT kuromoji_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "kuromoji_tokenizer", - "filter": [ - "hiragana_uppercase" - ] - } - } - } - } - } -} - -GET kuromoji_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "ちょっとまって" -} --------------------------------------------------- - -Which results in: - -[source,console-result] --------------------------------------------------- -{ - "tokens": [ - { - "token": "ちよつと", - "start_offset": 0, - "end_offset": 4, - "type": "word", - "position": 0 - }, - { - "token": "まつ", - "start_offset": 4, - "end_offset": 6, - "type": "word", - "position": 1 - }, - { - "token": "て", - "start_offset": 6, - "end_offset": 7, - "type": "word", - "position": 2 - } - ] -} --------------------------------------------------- - -[[analysis-kuromoji-katakana-uppercase]] -==== `katakana_uppercase` token filter - -The `katakana_uppercase` token filter normalizes small letters (捨て仮名) in katakana into standard letters. -This filter is useful if you want to search against old style Japanese text such as -patents, legal documents, contract policies, etc. - -For example: - -[source,console] --------------------------------------------------- -PUT kuromoji_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "kuromoji_tokenizer", - "filter": [ - "katakana_uppercase" - ] - } - } - } - } - } -} - -GET kuromoji_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "ストップウォッチ" -} --------------------------------------------------- - -Which results in: - -[source,console-result] --------------------------------------------------- -{ - "tokens": [ - { - "token": "ストツプウオツチ", - "start_offset": 0, - "end_offset": 8, - "type": "word", - "position": 0 - } - ] -} --------------------------------------------------- - -[[analysis-kuromoji-completion]] -==== `kuromoji_completion` token filter - -The `kuromoji_completion` token filter adds Japanese romanized tokens to the term attributes along with the original tokens (surface forms). - -[source,console] --------------------------------------------------- -GET _analyze -{ - "analyzer": "kuromoji_completion", - "text": "寿司" <1> -} --------------------------------------------------- - -<1> Returns `寿司`, `susi` (Kunrei-shiki) and `sushi` (Hepburn-shiki). - -The `kuromoji_completion` token filter accepts the following settings: - -`mode`:: -+ --- - -The tokenization mode determines how the tokenizer handles compound and -unknown words. It can be set to: - -`index`:: - - Simple romanization. Expected to be used when indexing. - -`query`:: - - Input Method aware romanization. Expected to be used when querying. - -Defaults to `index`. --- diff --git a/docs/plugins/analysis-nori.asciidoc b/docs/plugins/analysis-nori.asciidoc deleted file mode 100644 index 9eb3bf07fbd30..0000000000000 --- a/docs/plugins/analysis-nori.asciidoc +++ /dev/null @@ -1,550 +0,0 @@ -[[analysis-nori]] -=== Korean (nori) analysis plugin - -The Korean (nori) Analysis plugin integrates Lucene nori analysis -module into elasticsearch. It uses the https://bitbucket.org/eunjeon/mecab-ko-dic[mecab-ko-dic dictionary] -to perform morphological analysis of Korean texts. - -:plugin_name: analysis-nori -include::install_remove.asciidoc[] - -[[analysis-nori-analyzer]] -==== `nori` analyzer - -The `nori` analyzer consists of the following tokenizer and token filters: - -* <> -* <> token filter -* <> token filter -* {ref}/analysis-lowercase-tokenfilter.html[`lowercase`] token filter - -It supports the `decompound_mode` and `user_dictionary` settings from -<> and the `stoptags` setting from -<>. - -[[analysis-nori-tokenizer]] -==== `nori_tokenizer` - -The `nori_tokenizer` accepts the following settings: - -`decompound_mode`:: -+ --- - -The decompound mode determines how the tokenizer handles compound tokens. -It can be set to: - -`none`:: - - No decomposition for compounds. Example output: - - 가거도항 - 가곡역 - -`discard`:: - - Decomposes compounds and discards the original form (*default*). Example output: - - 가곡역 => 가곡, 역 - -`mixed`:: - - Decomposes compounds and keeps the original form. Example output: - - 가곡역 => 가곡역, 가곡, 역 --- - -`discard_punctuation`:: - - Whether punctuation should be discarded from the output. Defaults to `true`. - -`lenient`:: - - Whether the `user_dictionary` should be deduplicated on the provided `text`. - False by default causing duplicates to generate an error. - -`user_dictionary`:: -+ --- -The Nori tokenizer uses the https://bitbucket.org/eunjeon/mecab-ko-dic[mecab-ko-dic dictionary] by default. -A `user_dictionary` with custom nouns (`NNG`) may be appended to the default dictionary. -The dictionary should have the following format: - -[source,txt] ------------------------ - [ ... ] ------------------------ - -The first token is mandatory and represents the custom noun that should be added in -the dictionary. For compound nouns the custom segmentation can be provided -after the first token (`[ ... ]`). The segmentation of the -custom compound nouns is controlled by the `decompound_mode` setting. - - -As a demonstration of how the user dictionary can be used, save the following -dictionary to `$ES_HOME/config/userdict_ko.txt`: - -[source,txt] ------------------------ -c++ <1> -C쁠쁠 -세종 -세종시 세종 시 <2> ------------------------ - -<1> A simple noun -<2> A compound noun (`세종시`) followed by its decomposition: `세종` and `시`. - -Then create an analyzer as follows: - -[source,console] --------------------------------------------------- -PUT nori_sample -{ - "settings": { - "index": { - "analysis": { - "tokenizer": { - "nori_user_dict": { - "type": "nori_tokenizer", - "decompound_mode": "mixed", - "discard_punctuation": "false", - "user_dictionary": "userdict_ko.txt", - "lenient": "true" - } - }, - "analyzer": { - "my_analyzer": { - "type": "custom", - "tokenizer": "nori_user_dict" - } - } - } - } - } -} - -GET nori_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "세종시" <1> -} --------------------------------------------------- - -<1> Sejong city - -The above `analyze` request returns the following: - -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ { - "token" : "세종시", - "start_offset" : 0, - "end_offset" : 3, - "type" : "word", - "position" : 0, - "positionLength" : 2 <1> - }, { - "token" : "세종", - "start_offset" : 0, - "end_offset" : 2, - "type" : "word", - "position" : 0 - }, { - "token" : "시", - "start_offset" : 2, - "end_offset" : 3, - "type" : "word", - "position" : 1 - }] -} --------------------------------------------------- - -<1> This is a compound token that spans two positions (`mixed` mode). --- - -`user_dictionary_rules`:: -+ --- - -You can also inline the rules directly in the tokenizer definition using -the `user_dictionary_rules` option: - -[source,console] --------------------------------------------------- -PUT nori_sample -{ - "settings": { - "index": { - "analysis": { - "tokenizer": { - "nori_user_dict": { - "type": "nori_tokenizer", - "decompound_mode": "mixed", - "user_dictionary_rules": ["c++", "C쁠쁠", "세종", "세종시 세종 시"] - } - }, - "analyzer": { - "my_analyzer": { - "type": "custom", - "tokenizer": "nori_user_dict" - } - } - } - } - } -} --------------------------------------------------- --- - -The `nori_tokenizer` sets a number of additional attributes per token that are used by token filters -to modify the stream. -You can view all these additional attributes with the following request: - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer": "nori_tokenizer", - "text": "뿌리가 깊은 나무는", <1> - "attributes" : ["posType", "leftPOS", "rightPOS", "morphemes", "reading"], - "explain": true -} --------------------------------------------------- - -<1> A tree with deep roots - -Which responds with: - -[source,console-result] --------------------------------------------------- -{ - "detail": { - "custom_analyzer": true, - "charfilters": [], - "tokenizer": { - "name": "nori_tokenizer", - "tokens": [ - { - "token": "뿌리", - "start_offset": 0, - "end_offset": 2, - "type": "word", - "position": 0, - "leftPOS": "NNG(General Noun)", - "morphemes": null, - "posType": "MORPHEME", - "reading": null, - "rightPOS": "NNG(General Noun)" - }, - { - "token": "가", - "start_offset": 2, - "end_offset": 3, - "type": "word", - "position": 1, - "leftPOS": "JKS(Subject case marker)", - "morphemes": null, - "posType": "MORPHEME", - "reading": null, - "rightPOS": "JKS(Subject case marker)" - }, - { - "token": "깊", - "start_offset": 4, - "end_offset": 5, - "type": "word", - "position": 2, - "leftPOS": "VA(Adjective)", - "morphemes": null, - "posType": "MORPHEME", - "reading": null, - "rightPOS": "VA(Adjective)" - }, - { - "token": "은", - "start_offset": 5, - "end_offset": 6, - "type": "word", - "position": 3, - "leftPOS": "ETM(Adnominal form transformative ending)", - "morphemes": null, - "posType": "MORPHEME", - "reading": null, - "rightPOS": "ETM(Adnominal form transformative ending)" - }, - { - "token": "나무", - "start_offset": 7, - "end_offset": 9, - "type": "word", - "position": 4, - "leftPOS": "NNG(General Noun)", - "morphemes": null, - "posType": "MORPHEME", - "reading": null, - "rightPOS": "NNG(General Noun)" - }, - { - "token": "는", - "start_offset": 9, - "end_offset": 10, - "type": "word", - "position": 5, - "leftPOS": "JX(Auxiliary postpositional particle)", - "morphemes": null, - "posType": "MORPHEME", - "reading": null, - "rightPOS": "JX(Auxiliary postpositional particle)" - } - ] - }, - "tokenfilters": [] - } -} --------------------------------------------------- - -[[analysis-nori-speech]] -==== `nori_part_of_speech` token filter - -The `nori_part_of_speech` token filter removes tokens that match a set of -part-of-speech tags. The list of supported tags and their meanings can be found here: -{lucene-core-javadoc}/../analysis/nori/org/apache/lucene/analysis/ko/POS.Tag.html[Part of speech tags] - -It accepts the following setting: - -`stoptags`:: - - An array of part-of-speech tags that should be removed. - -and defaults to: - -[source,js] --------------------------------------------------- -"stoptags": [ - "E", - "IC", - "J", - "MAG", "MAJ", "MM", - "SP", "SSC", "SSO", "SC", "SE", - "XPN", "XSA", "XSN", "XSV", - "UNA", "NA", "VSV" -] --------------------------------------------------- -// NOTCONSOLE - -For example: - -[source,console] --------------------------------------------------- -PUT nori_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "nori_tokenizer", - "filter": [ - "my_posfilter" - ] - } - }, - "filter": { - "my_posfilter": { - "type": "nori_part_of_speech", - "stoptags": [ - "NR" <1> - ] - } - } - } - } - } -} - -GET nori_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "여섯 용이" <2> -} --------------------------------------------------- - -<1> Korean numerals should be removed (`NR`) -<2> Six dragons - -Which responds with: - -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ { - "token" : "용", - "start_offset" : 3, - "end_offset" : 4, - "type" : "word", - "position" : 1 - }, { - "token" : "이", - "start_offset" : 4, - "end_offset" : 5, - "type" : "word", - "position" : 2 - } ] -} --------------------------------------------------- - - -[[analysis-nori-readingform]] -==== `nori_readingform` token filter - -The `nori_readingform` token filter rewrites tokens written in Hanja to their Hangul form. - -[source,console] --------------------------------------------------- -PUT nori_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "nori_tokenizer", - "filter": [ "nori_readingform" ] - } - } - } - } - } -} - -GET nori_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "鄕歌" <1> -} --------------------------------------------------- - -<1> A token written in Hanja: Hyangga - -Which responds with: - -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ { - "token" : "향가", <1> - "start_offset" : 0, - "end_offset" : 2, - "type" : "word", - "position" : 0 - }] -} --------------------------------------------------- - -<1> The Hanja form is replaced by the Hangul translation. - - -[[analysis-nori-number]] -==== `nori_number` token filter - -The `nori_number` token filter normalizes Korean numbers -to regular Arabic decimal numbers in half-width characters. - -Korean numbers are often written using a combination of Hangul and Arabic numbers with various kinds of punctuation. -For example, 3.2천 means 3200. -This filter does this kind of normalization and allows a search for 3200 to match 3.2천 in text, -but can also be used to make range facets based on the normalized numbers and so on. - -[NOTE] -==== -Notice that this analyzer uses a token composition scheme and relies on punctuation tokens -being found in the token stream. -Please make sure your `nori_tokenizer` has `discard_punctuation` set to false. -In case punctuation characters, such as U+FF0E(.), is removed from the token stream, -this filter would find input tokens 3 and 2천 and give outputs 3 and 2000 instead of 3200, -which is likely not the intended result. - -If you want to remove punctuation characters from your index that are not part of normalized numbers, -add a `stop` token filter with the punctuation you wish to remove after `nori_number` in your analyzer chain. -==== -Below are some examples of normalizations this filter supports. -The input is untokenized text and the result is the single term attribute emitted for the input. - -- 영영칠 -> 7 -- 일영영영 -> 1000 -- 삼천2백2십삼 -> 3223 -- 일조육백만오천일 -> 1000006005001 -- 3.2천 -> 3200 -- 1.2만345.67 -> 12345.67 -- 4,647.100 -> 4647.1 -- 15,7 -> 157 (be aware of this weakness) - -For example: - -[source,console] --------------------------------------------------- -PUT nori_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "tokenizer_discard_puncuation_false", - "filter": [ - "part_of_speech_stop_sp", "nori_number" - ] - } - }, - "tokenizer": { - "tokenizer_discard_puncuation_false": { - "type": "nori_tokenizer", - "discard_punctuation": "false" - } - }, - "filter": { - "part_of_speech_stop_sp": { - "type": "nori_part_of_speech", - "stoptags": ["SP"] - } - } - } - } - } -} - -GET nori_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "십만이천오백과 3.2천" -} --------------------------------------------------- - -Which results in: - -[source,console-result] --------------------------------------------------- -{ - "tokens" : [{ - "token" : "102500", - "start_offset" : 0, - "end_offset" : 6, - "type" : "word", - "position" : 0 - }, { - "token" : "과", - "start_offset" : 6, - "end_offset" : 7, - "type" : "word", - "position" : 1 - }, { - "token" : "3200", - "start_offset" : 8, - "end_offset" : 12, - "type" : "word", - "position" : 2 - }] -} --------------------------------------------------- diff --git a/docs/plugins/analysis-phonetic.asciidoc b/docs/plugins/analysis-phonetic.asciidoc deleted file mode 100644 index dfbb8adb90bc6..0000000000000 --- a/docs/plugins/analysis-phonetic.asciidoc +++ /dev/null @@ -1,105 +0,0 @@ -[[analysis-phonetic]] -=== Phonetic analysis plugin - -The Phonetic Analysis plugin provides token filters which convert tokens to -their phonetic representation using Soundex, Metaphone, and a variety of other -algorithms. - -:plugin_name: analysis-phonetic -include::install_remove.asciidoc[] - - -[[analysis-phonetic-token-filter]] -==== `phonetic` token filter - -The `phonetic` token filter takes the following settings: - -`encoder`:: - - Which phonetic encoder to use. Accepts `metaphone` (default), - `double_metaphone`, `soundex`, `refined_soundex`, `caverphone1`, - `caverphone2`, `cologne`, `nysiis`, `koelnerphonetik`, `haasephonetik`, - `beider_morse`, `daitch_mokotoff`. - -`replace`:: - - Whether or not the original token should be replaced by the phonetic - token. Accepts `true` (default) and `false`. Not supported by - `beider_morse` encoding. - -[source,console] --------------------------------------------------- -PUT phonetic_sample -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "my_metaphone" - ] - } - }, - "filter": { - "my_metaphone": { - "type": "phonetic", - "encoder": "metaphone", - "replace": false - } - } - } - } - } -} - -GET phonetic_sample/_analyze -{ - "analyzer": "my_analyzer", - "text": "Joe Bloggs" <1> -} --------------------------------------------------- - -<1> Returns: `J`, `joe`, `BLKS`, `bloggs` - -It is important to note that `"replace": false` can lead to unexpected behavior since -the original and the phonetically analyzed version are both kept at the same token position. -Some queries handle these stacked tokens in special ways. For example, the fuzzy `match` -query does not apply {ref}/common-options.html#fuzziness[fuzziness] to stacked synonym tokens. -This can lead to issues that are difficult to diagnose and reason about. For this reason, it -is often beneficial to use separate fields for analysis with and without phonetic filtering. -That way searches can be run against both fields with differing boosts and trade-offs (e.g. -only run a fuzzy `match` query on the original text field, but not on the phonetic version). - -[discrete] -===== Double metaphone settings - -If the `double_metaphone` encoder is used, then this additional setting is -supported: - -`max_code_len`:: - - The maximum length of the emitted metaphone token. Defaults to `4`. - -[discrete] -===== Beider Morse settings - -If the `beider_morse` encoder is used, then these additional settings are -supported: - -`rule_type`:: - - Whether matching should be `exact` or `approx` (default). - -`name_type`:: - - Whether names are `ashkenazi`, `sephardic`, or `generic` (default). - -`languageset`:: - - An array of languages to check. If not specified, then the language will - be guessed. Accepts: `any`, `common`, `cyrillic`, `english`, `french`, - `german`, `hebrew`, `hungarian`, `polish`, `romanian`, `russian`, - `spanish`. diff --git a/docs/plugins/analysis-smartcn.asciidoc b/docs/plugins/analysis-smartcn.asciidoc deleted file mode 100644 index a0cb479b4dcb7..0000000000000 --- a/docs/plugins/analysis-smartcn.asciidoc +++ /dev/null @@ -1,428 +0,0 @@ -[[analysis-smartcn]] -=== Smart Chinese analysis plugin - -The Smart Chinese Analysis plugin integrates Lucene's Smart Chinese analysis -module into elasticsearch. - -It provides an analyzer for Chinese or mixed Chinese-English text. This -analyzer uses probabilistic knowledge to find the optimal word segmentation -for Simplified Chinese text. The text is first broken into sentences, then -each sentence is segmented into words. - -:plugin_name: analysis-smartcn -include::install_remove.asciidoc[] - - -[[analysis-smartcn-tokenizer]] -[discrete] -==== `smartcn` tokenizer and token filter - -The plugin provides the `smartcn` analyzer, `smartcn_tokenizer` tokenizer, and -`smartcn_stop` token filter which are not configurable. - -NOTE: The `smartcn_word` token filter and `smartcn_sentence` have been deprecated. - -==== Reimplementing and extending the analyzers - -The `smartcn` analyzer could be reimplemented as a `custom` analyzer that can -then be extended and configured as follows: - -[source,console] ----------------------------------------------------- -PUT smartcn_example -{ - "settings": { - "analysis": { - "analyzer": { - "rebuilt_smartcn": { - "tokenizer": "smartcn_tokenizer", - "filter": [ - "porter_stem", - "smartcn_stop" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: smartcn_example, first: smartcn, second: rebuilt_smartcn}\nendyaml\n/] - -[[analysis-smartcn_stop]] -==== `smartcn_stop` token filter - -The `smartcn_stop` token filter filters out stopwords defined by `smartcn` -analyzer (`_smartcn_`), and any other custom stopwords specified by the user. -This filter only supports the predefined `_smartcn_` stopwords list. -If you want to use a different predefined list, then use the -{ref}/analysis-stop-tokenfilter.html[`stop` token filter] instead. - -[source,console] --------------------------------------------------- -PUT smartcn_example -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "smartcn_with_stop": { - "tokenizer": "smartcn_tokenizer", - "filter": [ - "porter_stem", - "my_smartcn_stop" - ] - } - }, - "filter": { - "my_smartcn_stop": { - "type": "smartcn_stop", - "stopwords": [ - "_smartcn_", - "stack", - "的" - ] - } - } - } - } - } -} - -GET smartcn_example/_analyze -{ - "analyzer": "smartcn_with_stop", - "text": "哈喽,我们是 Elastic 我们是 Elastic Stack(Elasticsearch、Kibana、Beats 和 Logstash)的开发公司。从股票行情到 Twitter 消息流,从 Apache 日志到 WordPress 博文,我们可以帮助人们体验搜索的强大力量,帮助他们以截然不同的方式探索和分析数据" -} --------------------------------------------------- - -The above request returns: - -[source,console-result] --------------------------------------------------- -{ - "tokens": [ - { - "token": "哈", - "start_offset": 0, - "end_offset": 1, - "type": "word", - "position": 0 - }, - { - "token": "喽", - "start_offset": 1, - "end_offset": 2, - "type": "word", - "position": 1 - }, - { - "token": "我们", - "start_offset": 3, - "end_offset": 5, - "type": "word", - "position": 3 - }, - { - "token": "是", - "start_offset": 5, - "end_offset": 6, - "type": "word", - "position": 4 - }, - { - "token": "elast", - "start_offset": 7, - "end_offset": 14, - "type": "word", - "position": 5 - }, - { - "token": "我们", - "start_offset": 17, - "end_offset": 19, - "type": "word", - "position": 6 - }, - { - "token": "是", - "start_offset": 19, - "end_offset": 20, - "type": "word", - "position": 7 - }, - { - "token": "elast", - "start_offset": 21, - "end_offset": 28, - "type": "word", - "position": 8 - }, - { - "token": "elasticsearch", - "start_offset": 35, - "end_offset": 48, - "type": "word", - "position": 11 - }, - { - "token": "kibana", - "start_offset": 49, - "end_offset": 55, - "type": "word", - "position": 13 - }, - { - "token": "beat", - "start_offset": 56, - "end_offset": 61, - "type": "word", - "position": 15 - }, - { - "token": "和", - "start_offset": 62, - "end_offset": 63, - "type": "word", - "position": 16 - }, - { - "token": "logstash", - "start_offset": 64, - "end_offset": 72, - "type": "word", - "position": 17 - }, - { - "token": "开发", - "start_offset": 74, - "end_offset": 76, - "type": "word", - "position": 20 - }, - { - "token": "公司", - "start_offset": 76, - "end_offset": 78, - "type": "word", - "position": 21 - }, - { - "token": "从", - "start_offset": 79, - "end_offset": 80, - "type": "word", - "position": 23 - }, - { - "token": "股票", - "start_offset": 80, - "end_offset": 82, - "type": "word", - "position": 24 - }, - { - "token": "行情", - "start_offset": 82, - "end_offset": 84, - "type": "word", - "position": 25 - }, - { - "token": "到", - "start_offset": 84, - "end_offset": 85, - "type": "word", - "position": 26 - }, - { - "token": "twitter", - "start_offset": 86, - "end_offset": 93, - "type": "word", - "position": 27 - }, - { - "token": "消息", - "start_offset": 94, - "end_offset": 96, - "type": "word", - "position": 28 - }, - { - "token": "流", - "start_offset": 96, - "end_offset": 97, - "type": "word", - "position": 29 - }, - { - "token": "从", - "start_offset": 98, - "end_offset": 99, - "type": "word", - "position": 31 - }, - { - "token": "apach", - "start_offset": 100, - "end_offset": 106, - "type": "word", - "position": 32 - }, - { - "token": "日志", - "start_offset": 107, - "end_offset": 109, - "type": "word", - "position": 33 - }, - { - "token": "到", - "start_offset": 109, - "end_offset": 110, - "type": "word", - "position": 34 - }, - { - "token": "wordpress", - "start_offset": 111, - "end_offset": 120, - "type": "word", - "position": 35 - }, - { - "token": "博", - "start_offset": 121, - "end_offset": 122, - "type": "word", - "position": 36 - }, - { - "token": "文", - "start_offset": 122, - "end_offset": 123, - "type": "word", - "position": 37 - }, - { - "token": "我们", - "start_offset": 124, - "end_offset": 126, - "type": "word", - "position": 39 - }, - { - "token": "可以", - "start_offset": 126, - "end_offset": 128, - "type": "word", - "position": 40 - }, - { - "token": "帮助", - "start_offset": 128, - "end_offset": 130, - "type": "word", - "position": 41 - }, - { - "token": "人们", - "start_offset": 130, - "end_offset": 132, - "type": "word", - "position": 42 - }, - { - "token": "体验", - "start_offset": 132, - "end_offset": 134, - "type": "word", - "position": 43 - }, - { - "token": "搜索", - "start_offset": 134, - "end_offset": 136, - "type": "word", - "position": 44 - }, - { - "token": "强大", - "start_offset": 137, - "end_offset": 139, - "type": "word", - "position": 46 - }, - { - "token": "力量", - "start_offset": 139, - "end_offset": 141, - "type": "word", - "position": 47 - }, - { - "token": "帮助", - "start_offset": 142, - "end_offset": 144, - "type": "word", - "position": 49 - }, - { - "token": "他们", - "start_offset": 144, - "end_offset": 146, - "type": "word", - "position": 50 - }, - { - "token": "以", - "start_offset": 146, - "end_offset": 147, - "type": "word", - "position": 51 - }, - { - "token": "截然不同", - "start_offset": 147, - "end_offset": 151, - "type": "word", - "position": 52 - }, - { - "token": "方式", - "start_offset": 152, - "end_offset": 154, - "type": "word", - "position": 54 - }, - { - "token": "探索", - "start_offset": 154, - "end_offset": 156, - "type": "word", - "position": 55 - }, - { - "token": "和", - "start_offset": 156, - "end_offset": 157, - "type": "word", - "position": 56 - }, - { - "token": "分析", - "start_offset": 157, - "end_offset": 159, - "type": "word", - "position": 57 - }, - { - "token": "数据", - "start_offset": 159, - "end_offset": 161, - "type": "word", - "position": 58 - } - ] -} --------------------------------------------------- diff --git a/docs/plugins/analysis-stempel.asciidoc b/docs/plugins/analysis-stempel.asciidoc deleted file mode 100644 index 74a0b1a975f82..0000000000000 --- a/docs/plugins/analysis-stempel.asciidoc +++ /dev/null @@ -1,112 +0,0 @@ -[[analysis-stempel]] -=== Stempel Polish analysis plugin - -The Stempel analysis plugin integrates Lucene's Stempel analysis -module for Polish into elasticsearch. - -:plugin_name: analysis-stempel -include::install_remove.asciidoc[] - -[[analysis-stempel-tokenizer]] -[discrete] -==== `stempel` tokenizer and token filters - -The plugin provides the `polish` analyzer and the `polish_stem` and `polish_stop` token filters, -which are not configurable. - -==== Reimplementing and extending the analyzers - -The `polish` analyzer could be reimplemented as a `custom` analyzer that can -then be extended and configured differently as follows: - -[source,console] ----------------------------------------------------- -PUT /stempel_example -{ - "settings": { - "analysis": { - "analyzer": { - "rebuilt_stempel": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "polish_stop", - "polish_stem" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: stempel_example, first: polish, second: rebuilt_stempel}\nendyaml\n/] - -[[analysis-polish-stop]] -==== `polish_stop` token filter - -The `polish_stop` token filter filters out Polish stopwords (`_polish_`), and -any other custom stopwords specified by the user. This filter only supports -the predefined `_polish_` stopwords list. If you want to use a different -predefined list, then use the -{ref}/analysis-stop-tokenfilter.html[`stop` token filter] instead. - -[source,console] --------------------------------------------------- -PUT /polish_stop_example -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "analyzer_with_stop": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "polish_stop" - ] - } - }, - "filter": { - "polish_stop": { - "type": "polish_stop", - "stopwords": [ - "_polish_", - "jeść" - ] - } - } - } - } - } -} - -GET polish_stop_example/_analyze -{ - "analyzer": "analyzer_with_stop", - "text": "Gdzie kucharek sześć, tam nie ma co jeść." -} --------------------------------------------------- - -The above request returns: - -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "kucharek", - "start_offset" : 6, - "end_offset" : 14, - "type" : "", - "position" : 1 - }, - { - "token" : "sześć", - "start_offset" : 15, - "end_offset" : 20, - "type" : "", - "position" : 2 - } - ] -} --------------------------------------------------- diff --git a/docs/plugins/analysis-ukrainian.asciidoc b/docs/plugins/analysis-ukrainian.asciidoc deleted file mode 100644 index 8bd082637cd9e..0000000000000 --- a/docs/plugins/analysis-ukrainian.asciidoc +++ /dev/null @@ -1,15 +0,0 @@ -[[analysis-ukrainian]] -=== Ukrainian analysis plugin - -The Ukrainian analysis plugin integrates Lucene's UkrainianMorfologikAnalyzer into elasticsearch. - -It provides stemming for Ukrainian using the https://github.com/morfologik/morfologik-stemming[Morfologik project]. - -:plugin_name: analysis-ukrainian -include::install_remove.asciidoc[] - -[[analysis-ukrainian-analyzer]] -[discrete] -==== `ukrainian` analyzer - -The plugin provides the `ukrainian` analyzer. diff --git a/docs/plugins/analysis.asciidoc b/docs/plugins/analysis.asciidoc deleted file mode 100644 index 09dda9458adbd..0000000000000 --- a/docs/plugins/analysis.asciidoc +++ /dev/null @@ -1,69 +0,0 @@ -[[analysis]] -== Analysis plugins - -Analysis plugins extend Elasticsearch by adding new analyzers, tokenizers, -token filters, or character filters to Elasticsearch. - -[discrete] -==== Core analysis plugins - -The core analysis plugins are: - -<>:: - -Adds extended Unicode support using the http://site.icu-project.org/[ICU] -libraries, including better analysis of Asian languages, Unicode -normalization, Unicode-aware case folding, collation support, and -transliteration. - -<>:: - -Advanced analysis of Japanese using the https://www.atilika.org/[Kuromoji analyzer]. - -<>:: - -Morphological analysis of Korean using the Lucene Nori analyzer. - -<>:: - -Analyzes tokens into their phonetic equivalent using Soundex, Metaphone, -Caverphone, and other codecs. - -<>:: - -An analyzer for Chinese or mixed Chinese-English text. This analyzer uses -probabilistic knowledge to find the optimal word segmentation for Simplified -Chinese text. The text is first broken into sentences, then each sentence is -segmented into words. - -<>:: - -Provides high quality stemming for Polish. - -<>:: - -Provides stemming for Ukrainian. - -[discrete] -==== Community contributed analysis plugins - -A number of analysis plugins have been contributed by our community: - -* https://github.com/medcl/elasticsearch-analysis-ik[IK Analysis Plugin] (by Medcl) -* https://github.com/medcl/elasticsearch-analysis-pinyin[Pinyin Analysis Plugin] (by Medcl) -* https://github.com/duydo/elasticsearch-analysis-vietnamese[Vietnamese Analysis Plugin] (by Duy Do) -* https://github.com/medcl/elasticsearch-analysis-stconvert[STConvert Analysis Plugin] (by Medcl) - -include::analysis-icu.asciidoc[] - -include::analysis-kuromoji.asciidoc[] - -include::analysis-nori.asciidoc[] - -include::analysis-phonetic.asciidoc[] - -include::analysis-smartcn.asciidoc[] - -include::analysis-stempel.asciidoc[] - -include::analysis-ukrainian.asciidoc[] diff --git a/docs/plugins/api.asciidoc b/docs/plugins/api.asciidoc deleted file mode 100644 index 8ab4991bedd5e..0000000000000 --- a/docs/plugins/api.asciidoc +++ /dev/null @@ -1,21 +0,0 @@ -[[api]] -== API extension plugins - -API extension plugins add new functionality to Elasticsearch by adding new APIs or features, usually to do with search or mapping. - -[discrete] -=== Community contributed API extension plugins - -A number of plugins have been contributed by our community: - -* https://github.com/carrot2/elasticsearch-carrot2[carrot2 Plugin]: - Results clustering with https://github.com/carrot2/carrot2[carrot2] (by Dawid Weiss) - -* https://github.com/wikimedia/search-extra[Elasticsearch Trigram Accelerated Regular Expression Filter]: - (by Wikimedia Foundation/Nik Everett) - -* https://github.com/wikimedia/search-highlighter[Elasticsearch Experimental Highlighter]: - (by Wikimedia Foundation/Nik Everett) - -* https://github.com/zentity-io/zentity[Entity Resolution Plugin] (https://zentity.io[zentity]): - Real-time entity resolution with pure Elasticsearch (by Dave Moore) diff --git a/docs/plugins/authors.asciidoc b/docs/plugins/authors.asciidoc deleted file mode 100644 index 08bf3ea3994a2..0000000000000 --- a/docs/plugins/authors.asciidoc +++ /dev/null @@ -1,27 +0,0 @@ -[[plugin-authors]] -== Creating an {es} plugin - -{es} plugins are modular bits of code that add functionality to -{es}. Plugins are written in Java and implement Java interfaces that -are defined in the source code. Plugins are composed of JAR files and metadata -files, compressed in a single zip file. - -There are two ways to create a plugin: - -<>:: -Text analysis plugins can be developed against the stable plugin API to provide -{es} with custom Lucene analyzers, token filters, character filters, and -tokenizers. - -<>:: -Other plugins can be developed against the classic plugin API to provide custom -authentication, authorization, or scoring mechanisms, and more. - -:plugin-type: stable -include::development/creating-stable-plugins.asciidoc[] -include::development/example-text-analysis-plugin.asciidoc[] -:!plugin-type: - -:plugin-type: classic -include::development/creating-classic-plugins.asciidoc[] -:!plugin-type: \ No newline at end of file diff --git a/docs/plugins/development/creating-classic-plugins.asciidoc b/docs/plugins/development/creating-classic-plugins.asciidoc deleted file mode 100644 index 58dc00e496c2d..0000000000000 --- a/docs/plugins/development/creating-classic-plugins.asciidoc +++ /dev/null @@ -1,92 +0,0 @@ -[[creating-classic-plugins]] -=== Creating classic plugins - -Classic plugins provide {es} with mechanisms for custom authentication, -authorization, scoring, and more. - -[IMPORTANT] -.Plugin release lifecycle -============================================== - -Classic plugins require you to build a new version for each new {es} release. -This version is checked when the plugin is installed and when it is loaded. {es} -will refuse to start in the presence of plugins with the incorrect -`elasticsearch.version`. - -============================================== - -[discrete] -==== Classic plugin file structure - -Classic plugins are ZIP files composed of JAR files and -<>, a Java properties file that describes the -plugin. - -Note that only JAR files at the root of the plugin are added to the classpath -for the plugin. If you need other resources, package them into a resources JAR. - -[discrete] -==== Example plugins - -The {es} repository contains {es-repo}tree/main/plugins/examples[examples of plugins]. Some of these include: - -* a plugin with {es-repo}tree/main/plugins/examples/custom-settings[custom settings] -* a plugin with a {es-repo}tree/main/plugins/examples/custom-processor[custom ingest processor] -* adding {es-repo}tree/main/plugins/examples/rest-handler[custom rest endpoints] -* adding a {es-repo}tree/main/plugins/examples/rescore[custom rescorer] -* a script {es-repo}tree/main/plugins/examples/script-expert-scoring[implemented in Java] - -These examples provide the bare bones needed to get started. For more -information about how to write a plugin, we recommend looking at the -{es-repo}tree/main/plugins/[source code of existing plugins] for inspiration. - -[discrete] -==== Testing your plugin - -Use `bin/elasticsearch-plugin install file:///path/to/your/plugin` -to install your plugin for testing. The Java plugin is auto-loaded only if it's in the -`plugins/` directory. - -[discrete] -[[plugin-authors-jsm]] -==== Java Security permissions - -Some plugins may need additional security permissions. A plugin can include -the optional `plugin-security.policy` file containing `grant` statements for -additional permissions. Any additional permissions will be displayed to the user -with a large warning, and they will have to confirm them when installing the -plugin interactively. So if possible, it is best to avoid requesting any -spurious permissions! - -If you are using the {es} Gradle build system, place this file in -`src/main/plugin-metadata` and it will be applied during unit tests as well. - -The Java security model is stack-based, and additional -permissions are granted to the jars in your plugin, so you have to -write proper security code around operations requiring elevated privileges. -You might add a check to prevent unprivileged code (such as scripts) -from gaining escalated permissions. For example: - -[source,java] --------------------------------------------------- -// ES permission you should check before doPrivileged() blocks -import org.elasticsearch.SpecialPermission; - -SecurityManager sm = System.getSecurityManager(); -if (sm != null) { - // unprivileged code such as scripts do not have SpecialPermission - sm.checkPermission(new SpecialPermission()); -} -AccessController.doPrivileged( - // sensitive operation -); --------------------------------------------------- - -Check https://www.oracle.com/technetwork/java/seccodeguide-139067.html[Secure Coding Guidelines for Java SE] -for more information. - -[[plugin-descriptor-file-classic]] -==== The plugin descriptor file for classic plugins - -include::plugin-descriptor-file.asciidoc[] diff --git a/docs/plugins/development/creating-stable-plugins.asciidoc b/docs/plugins/development/creating-stable-plugins.asciidoc deleted file mode 100644 index 9f98774b5a761..0000000000000 --- a/docs/plugins/development/creating-stable-plugins.asciidoc +++ /dev/null @@ -1,131 +0,0 @@ -[[creating-stable-plugins]] -=== Creating text analysis plugins with the stable plugin API - -Text analysis plugins provide {es} with custom {ref}/analysis.html[Lucene -analyzers, token filters, character filters, and tokenizers]. - -[discrete] -==== The stable plugin API - -Text analysis plugins can be developed against the stable plugin API. This API -consists of the following dependencies: - -* `plugin-api` - an API used by plugin developers to implement custom {es} -plugins. -* `plugin-analysis-api` - an API used by plugin developers to implement analysis -plugins and integrate them into {es}. -* `lucene-analysis-common` - a dependency of `plugin-analysis-api` that contains -core Lucene analysis interfaces like `Tokenizer`, `Analyzer`, and `TokenStream`. - -For new versions of {es} within the same major version, plugins built against -this API does not need to be recompiled. Future versions of the API will be -backwards compatible and plugins are binary compatible with future versions of -{es}. In other words, once you have a working artifact, you can re-use it when -you upgrade {es} to a new bugfix or minor version. - -A text analysis plugin can implement four factory classes that are provided by -the analysis plugin API. - -* `AnalyzerFactory` to create a Lucene analyzer -* `CharFilterFactory` to create a character character filter -* `TokenFilterFactory` to create a Lucene token filter -* `TokenizerFactory` to create a Lucene tokenizer - -The key to implementing a stable plugin is the `@NamedComponent` annotation. -Many of {es}'s components have names that are used in configurations. For -example, the keyword analyzer is referenced in configuration with the name -`"keyword"`. Once your custom plugin is installed in your cluster, your named -components may be referenced by name in these configurations as well. - -You can also create text analysis plugins as a <>. However, classic plugins are pinned to a specific version of -{es}. You need to recompile them when upgrading {es}. Because classic plugins -are built against internal APIs that can change, upgrading to a new version may -require code changes. - -[discrete] -==== Stable plugin file structure - -Stable plugins are ZIP files composed of JAR files and two metadata files: - -* `stable-plugin-descriptor.properties` - a Java properties file that describes -the plugin. Refer to <>. -* `named_components.json` - a JSON file mapping interfaces to key-value pairs -of component names and implementation classes. - -Note that only JAR files at the root of the plugin are added to the classpath -for the plugin. If you need other resources, package them into a resources JAR. - -[discrete] -==== Development process - -Elastic provides a Gradle plugin, `elasticsearch.stable-esplugin`, that makes it -easier to develop and package stable plugins. The steps in this section assume -you use this plugin. However, you don't need Gradle to create plugins. - -The {es} Github repository contains -{es-repo}tree/main/plugins/examples/stable-analysis[an example analysis plugin]. -The example `build.gradle` build script provides a good starting point for -developing your own plugin. - -[discrete] -===== Prerequisites - -Plugins are written in Java, so you need to install a Java Development Kit -(JDK). Install Gradle if you want to use Gradle. - -[discrete] -===== Step by step - -. Create a directory for your project. -. Copy the example `build.gradle` build script to your project directory. Note -that this build script uses the `elasticsearch.stable-esplugin` gradle plugin to -build your plugin. -. Edit the `build.gradle` build script: -** Add a definition for the `pluginApiVersion` and matching `luceneVersion` -variables to the top of the file. You can find these versions in the -`build-tools-internal/version.properties` file in the {es-repo}[Elasticsearch -Github repository]. -** Edit the `name` and `description` in the `esplugin` section of the build -script. This will create the plugin descriptor file. If you're not using the -`elasticsearch.stable-esplugin` gradle plugin, refer to -<> to create the file manually. -** Add module information. -** Ensure you have declared the following compile-time dependencies. These -dependencies are compile-time only because {es} will provide these libraries at -runtime. -*** `org.elasticsearch.plugin:elasticsearch-plugin-api` -*** `org.elasticsearch.plugin:elasticsearch-plugin-analysis-api` -*** `org.apache.lucene:lucene-analysis-common` -** For unit testing, ensure these dependencies have also been added to the -`build.gradle` script as `testImplementation` dependencies. -. Implement an interface from the analysis plugin API, annotating it with -`NamedComponent`. Refer to <> for an example. -. You should now be able to assemble a plugin ZIP file by running: -+ -[source,sh] ----- -gradle bundlePlugin ----- -The resulting plugin ZIP file is written to the `build/distributions` -directory. - -[discrete] -===== YAML REST tests - -The Gradle `elasticsearch.yaml-rest-test` plugin enables testing of your -plugin using the {es-repo}blob/main/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc[{es} yamlRestTest framework]. -These tests use a YAML-formatted domain language to issue REST requests against -an internal {es} cluster that has your plugin installed, and to check the -results of those requests. The structure of a YAML REST test directory is as -follows: - -* A test suite class, defined under `src/yamlRestTest/java`. This class should -extend `ESClientYamlSuiteTestCase`. -* The YAML tests themselves should be defined under -`src/yamlRestTest/resources/test/`. - -[[plugin-descriptor-file-stable]] -==== The plugin descriptor file for stable plugins - -include::plugin-descriptor-file.asciidoc[] diff --git a/docs/plugins/development/example-text-analysis-plugin.asciidoc b/docs/plugins/development/example-text-analysis-plugin.asciidoc deleted file mode 100644 index 834ed196d6498..0000000000000 --- a/docs/plugins/development/example-text-analysis-plugin.asciidoc +++ /dev/null @@ -1,213 +0,0 @@ -[[example-text-analysis-plugin]] -==== Example text analysis plugin - -This example shows how to create a simple "Hello world" text analysis plugin -using the stable plugin API. The plugin provides a custom Lucene token filter -that strips all tokens except for "hello" and "world". - -Elastic provides a Grade plugin, `elasticsearch.stable-esplugin`, that makes it -easier to develop and package stable plugins. The steps in this guide assume you -use this plugin. However, you don't need Gradle to create plugins. - -. Create a new directory for your project. -. In this example, the source code is organized under the `main` and -`test` directories. In your project's home directory, create `src/` `src/main/`, -and `src/test/` directories. -. Create the following `build.gradle` build script in your project's home -directory: -+ -[source,gradle] ----- -ext.pluginApiVersion = '8.7.0' -ext.luceneVersion = '9.5.0' - -buildscript { - ext.pluginApiVersion = '8.7.0' - repositories { - mavenCentral() - } - dependencies { - classpath "org.elasticsearch.gradle:build-tools:${pluginApiVersion}" - } -} - -apply plugin: 'elasticsearch.stable-esplugin' -apply plugin: 'elasticsearch.yaml-rest-test' - -esplugin { - name 'my-plugin' - description 'My analysis plugin' -} - -group 'org.example' -version '1.0-SNAPSHOT' - -repositories { - mavenLocal() - mavenCentral() -} - -dependencies { - - //TODO transitive dependency off and plugin-api dependency? - compileOnly "org.elasticsearch.plugin:elasticsearch-plugin-api:${pluginApiVersion}" - compileOnly "org.elasticsearch.plugin:elasticsearch-plugin-analysis-api:${pluginApiVersion}" - compileOnly "org.apache.lucene:lucene-analysis-common:${luceneVersion}" - - //TODO for testing this also have to be declared - testImplementation "org.elasticsearch.plugin:elasticsearch-plugin-api:${pluginApiVersion}" - testImplementation "org.elasticsearch.plugin:elasticsearch-plugin-analysis-api:${pluginApiVersion}" - testImplementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}" - - testImplementation ('junit:junit:4.13.2'){ - exclude group: 'org.hamcrest' - } - testImplementation 'org.mockito:mockito-core:4.4.0' - testImplementation 'org.hamcrest:hamcrest:2.2' - -} ----- -. In `src/main/java/org/example/`, create `HelloWorldTokenFilter.java`. This -file provides the code for a token filter that strips all tokens except for -"hello" and "world": -+ -[source,java] ----- -package org.example; - -import org.apache.lucene.analysis.FilteringTokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; - -import java.util.Arrays; - -public class HelloWorldTokenFilter extends FilteringTokenFilter { - private final CharTermAttribute term = addAttribute(CharTermAttribute.class); - - public HelloWorldTokenFilter(TokenStream input) { - super(input); - } - - @Override - public boolean accept() { - if (term.length() != 5) return false; - return Arrays.equals(term.buffer(), 0, 4, "hello".toCharArray(), 0, 4) - || Arrays.equals(term.buffer(), 0, 4, "world".toCharArray(), 0, 4); - } -} ----- -. This filter can be provided to Elasticsearch using the following -`HelloWorldTokenFilterFactory.java` factory class. The `@NamedComponent` -annotation is used to give the filter the `hello_world` name. This is the name -you can use to refer to the filter, once the plugin has been deployed. -+ -[source,java] ----- -package org.example; - -import org.apache.lucene.analysis.TokenStream; -import org.elasticsearch.plugin.analysis.TokenFilterFactory; -import org.elasticsearch.plugin.NamedComponent; - -@NamedComponent(value = "hello_world") -public class HelloWorldTokenFilterFactory implements TokenFilterFactory { - - @Override - public TokenStream create(TokenStream tokenStream) { - return new HelloWorldTokenFilter(tokenStream); - } - -} ----- -. Unit tests may go under the `src/test` directory. You will have to add -dependencies for your preferred testing framework. - -. Run: -+ -[source,sh] ----- -gradle bundlePlugin ----- -This builds the JAR file, generates the metadata files, and bundles them into a -plugin ZIP file. The resulting ZIP file will be written to the -`build/distributions` directory. -. <>. -. You can use the `_analyze` API to verify that the `hello_world` token filter -works as expected: -+ -[source,console] ----- -GET /_analyze -{ - "text": "hello to everyone except the world", - "tokenizer": "standard", - "filter": ["hello_world"] -} ----- -// TEST[skip:would require this plugin to be installed] - -[discrete] -=== YAML REST tests - -If you are using the `elasticsearch.stable-esplugin` plugin for Gradle, you can -use {es}'s YAML Rest Test framework. This framework allows you to load your -plugin in a running test cluster and issue real REST API queries against it. The -full syntax for this framework is beyond the scope of this tutorial, but there -are many examples in the Elasticsearch repository. Refer to the -{es-repo}tree/main/plugins/examples/stable-analysis[example analysis plugin] in -the {es} Github repository for an example. - -. Create a `yamlRestTest` directory in the `src` directory. -. Under the `yamlRestTest` directory, create a `java` folder for Java sources -and a `resources` folder. -. In `src/yamlRestTest/java/org/example/`, create -`HelloWorldPluginClientYamlTestSuiteIT.java`. This class implements -`ESClientYamlSuiteTestCase`. -+ -[source,java] ----- -import com.carrotsearch.randomizedtesting.annotations.Name; -import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; -import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; -import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; - -public class HelloWorldPluginClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { - - public HelloWorldPluginClientYamlTestSuiteIT( - @Name("yaml") ClientYamlTestCandidate testCandidate - ) { - super(testCandidate); - } - - @ParametersFactory - public static Iterable parameters() throws Exception { - return ESClientYamlSuiteTestCase.createParameters(); - } -} ----- -. In `src/yamlRestTest/resources/rest-api-spec/test/plugin`, create the -`10_token_filter.yml` YAML file: -+ -[source,yaml] ----- -## Sample rest test ---- -"Hello world plugin test - removes all tokens except hello and world": - - do: - indices.analyze: - body: - text: hello to everyone except the world - tokenizer: standard - filter: - - type: "hello_world" - - length: { tokens: 2 } - - match: { tokens.0.token: "hello" } - - match: { tokens.1.token: "world" } - ----- -. Run the test with: -+ -[source,sh] ----- -gradle yamlRestTest ----- \ No newline at end of file diff --git a/docs/plugins/development/plugin-descriptor-file.asciidoc b/docs/plugins/development/plugin-descriptor-file.asciidoc deleted file mode 100644 index 7ed5979a93c04..0000000000000 --- a/docs/plugins/development/plugin-descriptor-file.asciidoc +++ /dev/null @@ -1,58 +0,0 @@ -ifeval::["{plugin-type}" == "stable"] -The stable plugin descriptor file is a Java properties file called -`stable-plugin-descriptor.properties` -endif::[] -ifeval::["{plugin-type}" == "classic"] -The classic plugin descriptor file is a Java properties file called -`plugin-descriptor.properties` -endif::[] -that describes the plugin. The file is automatically created if you are -using {es}'s Gradle build system. If you're not using the gradle plugin, you -can create it manually using the following template. - -[source,properties] -:plugin-properties-files: {elasticsearch-root}/build-tools/src/main/resources -[source,yaml] ----- -include::{plugin-properties-files}/plugin-descriptor.properties[] ----- - -[discrete] -==== Properties - - -[cols="<,<,<",options="header",] -|======================================================================= -|Element | Type | Description - -|`description` |String | simple summary of the plugin - -|`version` |String | plugin's version - -|`name` |String | the plugin name - -ifeval::["{plugin-type}" == "stable"] -|`classname` |String | this property is for classic plugins. Do -not include this property for stable plugins. -endif::[] - -ifeval::["{plugin-type}" == "classic"] -|`classname` |String | the name of the class to load, -fully-qualified. - -|`extended.plugins` |String | other plugins this plugin extends through -SPI. - -|`modulename` |String | the name of the module to load classname -from. Only applies to "isolated" plugins. This is optional. Specifying it causes -the plugin to be loaded as a module. -endif::[] - -|`java.version` |String | version of java the code is built against. -Use the system property `java.specification.version`. Version string must be a -sequence of nonnegative decimal integers separated by "."'s and may have leading -zeros. - -|`elasticsearch.version` |String | version of {es} compiled against. - -|======================================================================= diff --git a/docs/plugins/discovery-azure-classic.asciidoc b/docs/plugins/discovery-azure-classic.asciidoc deleted file mode 100644 index b8d37f024172c..0000000000000 --- a/docs/plugins/discovery-azure-classic.asciidoc +++ /dev/null @@ -1,443 +0,0 @@ -[[discovery-azure-classic]] -=== Azure Classic discovery plugin - -The Azure Classic Discovery plugin uses the Azure Classic API to identify the -addresses of seed hosts. - -deprecated[5.0.0, This plugin will be removed in the future] - -:plugin_name: discovery-azure-classic -include::install_remove.asciidoc[] - - -[[discovery-azure-classic-usage]] -==== Azure Virtual Machine discovery - -Azure VM discovery allows to use the Azure APIs to perform automatic discovery. -Here is a simple sample configuration: - -[source,yaml] ----- -cloud: - azure: - management: - subscription.id: XXX-XXX-XXX-XXX - cloud.service.name: es-demo-app - keystore: - path: /path/to/azurekeystore.pkcs12 - password: WHATEVER - type: pkcs12 - -discovery: - seed_providers: azure ----- - -[IMPORTANT] -.Binding the network host -============================================== - -The keystore file must be placed in a directory accessible by Elasticsearch like the `config` directory. - -It's important to define `network.host` as by default it's bound to `localhost`. - -You can use {ref}/modules-network.html[core network host settings]. For example `_en0_`. - -============================================== - -[[discovery-azure-classic-short]] -===== How to start (short story) - -* Create Azure instances -* Install Elasticsearch -* Install Azure plugin -* Modify `elasticsearch.yml` file -* Start Elasticsearch - -[[discovery-azure-classic-settings]] -===== Azure credential API settings - -The following are a list of settings that can further control the credential API: - -[horizontal] -`cloud.azure.management.keystore.path`:: - - /path/to/keystore - -`cloud.azure.management.keystore.type`:: - - `pkcs12`, `jceks` or `jks`. Defaults to `pkcs12`. - -`cloud.azure.management.keystore.password`:: - - your_password for the keystore - -`cloud.azure.management.subscription.id`:: - - your_azure_subscription_id - -`cloud.azure.management.cloud.service.name`:: - - your_azure_cloud_service_name. This is the cloud service name/DNS but without the `cloudapp.net` part. - So if the DNS name is `abc.cloudapp.net` then the `cloud.service.name` to use is just `abc`. - - -[[discovery-azure-classic-settings-advanced]] -===== Advanced settings - -The following are a list of settings that can further control the discovery: - -`discovery.azure.host.type`:: - - Either `public_ip` or `private_ip` (default). Azure discovery will use the - one you set to ping other nodes. - -`discovery.azure.endpoint.name`:: - - When using `public_ip` this setting is used to identify the endpoint name - used to forward requests to Elasticsearch (aka transport port name). - Defaults to `elasticsearch`. In Azure management console, you could define - an endpoint `elasticsearch` forwarding for example requests on public IP - on port 8100 to the virtual machine on port 9300. - -`discovery.azure.deployment.name`:: - - Deployment name if any. Defaults to the value set with - `cloud.azure.management.cloud.service.name`. - -`discovery.azure.deployment.slot`:: - - Either `staging` or `production` (default). - -For example: - -[source,yaml] ----- -discovery: - type: azure - azure: - host: - type: private_ip - endpoint: - name: elasticsearch - deployment: - name: your_azure_cloud_service_name - slot: production ----- - -[[discovery-azure-classic-long]] -==== Setup process for Azure Discovery - -We will expose here one strategy which is to hide our Elasticsearch cluster from outside. - -With this strategy, only VMs behind the same virtual port can talk to each -other. That means that with this mode, you can use Elasticsearch unicast -discovery to build a cluster, using the Azure API to retrieve information -about your nodes. - -[[discovery-azure-classic-long-prerequisites]] -===== Prerequisites - -Before starting, you need to have: - -* A https://azure.microsoft.com/en-us/[Windows Azure account] -* OpenSSL that isn't from MacPorts, specifically `OpenSSL 1.0.1f 6 Jan - 2014` doesn't seem to create a valid keypair for ssh. FWIW, - `OpenSSL 1.0.1c 10 May 2012` on Ubuntu 14.04 LTS is known to work. -* SSH keys and certificate -+ --- - -You should follow http://azure.microsoft.com/en-us/documentation/articles/linux-use-ssh-key/[this guide] to learn -how to create or use existing SSH keys. If you have already done it, you can skip the following. - -Here is a description on how to generate SSH keys using `openssl`: - -[source,sh] ----- -# You may want to use another dir than /tmp -cd /tmp -openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout azure-private.key -out azure-certificate.pem -chmod 600 azure-private.key azure-certificate.pem -openssl x509 -outform der -in azure-certificate.pem -out azure-certificate.cer ----- - -Generate a keystore which will be used by the plugin to authenticate with a certificate -all Azure API calls. - -[source,sh] ----- -# Generate a keystore (azurekeystore.pkcs12) -# Transform private key to PEM format -openssl pkcs8 -topk8 -nocrypt -in azure-private.key -inform PEM -out azure-pk.pem -outform PEM -# Transform certificate to PEM format -openssl x509 -inform der -in azure-certificate.cer -out azure-cert.pem -cat azure-cert.pem azure-pk.pem > azure.pem.txt -# You MUST enter a password! -openssl pkcs12 -export -in azure.pem.txt -out azurekeystore.pkcs12 -name azure -noiter -nomaciter ----- - -Upload the `azure-certificate.cer` file both in the Elasticsearch Cloud Service (under `Manage Certificates`), -and under `Settings -> Manage Certificates`. - -IMPORTANT: When prompted for a password, you need to enter a non empty one. - -See this http://www.windowsazure.com/en-us/manage/linux/how-to-guides/ssh-into-linux/[guide] for -more details about how to create keys for Azure. - -Once done, you need to upload your certificate in Azure: - -* Go to the https://account.windowsazure.com/[management console]. -* Sign in using your account. -* Click on `Portal`. -* Go to Settings (bottom of the left list) -* On the bottom bar, click on `Upload` and upload your `azure-certificate.cer` file. - -You may want to use -http://www.windowsazure.com/en-us/develop/nodejs/how-to-guides/command-line-tools/[Windows Azure Command-Line Tool]: - --- - -* Install https://github.com/joyent/node/wiki/Installing-Node.js-via-package-manager[NodeJS], for example using -homebrew on MacOS X: -+ -[source,sh] ----- -brew install node ----- - -* Install Azure tools -+ -[source,sh] ----- -sudo npm install azure-cli -g ----- - -* Download and import your azure settings: -+ -[source,sh] ----- -# This will open a browser and will download a .publishsettings file -azure account download - -# Import this file (we have downloaded it to /tmp) -# Note, it will create needed files in ~/.azure. You can remove azure.publishsettings when done. -azure account import /tmp/azure.publishsettings ----- - -[[discovery-azure-classic-long-instance]] -===== Creating your first instance - -You need to have a storage account available. Check http://www.windowsazure.com/en-us/develop/net/how-to-guides/blob-storage/#create-account[Azure Blob Storage documentation] -for more information. - -You will need to choose the operating system you want to run on. To get a list of official available images, run: - -[source,sh] ----- -azure vm image list ----- - -Let's say we are going to deploy an Ubuntu image on an extra small instance in West Europe: - -[horizontal] -Azure cluster name:: - - `azure-elasticsearch-cluster` - -Image:: - - `b39f27a8b8c64d52b05eac6a62ebad85__Ubuntu-13_10-amd64-server-20130808-alpha3-en-us-30GB` - -VM Name:: - - `myesnode1` - -VM Size:: - - `extrasmall` - -Location:: - - `West Europe` - -Login:: - - `elasticsearch` - -Password:: - - `password1234!!` - - -Using command line: - -[source,sh] ----- -azure vm create azure-elasticsearch-cluster \ - b39f27a8b8c64d52b05eac6a62ebad85__Ubuntu-13_10-amd64-server-20130808-alpha3-en-us-30GB \ - --vm-name myesnode1 \ - --location "West Europe" \ - --vm-size extrasmall \ - --ssh 22 \ - --ssh-cert /tmp/azure-certificate.pem \ - elasticsearch password1234\!\! ----- - -You should see something like: - -[source,text] ----- -info: Executing command vm create -+ Looking up image -+ Looking up cloud service -+ Creating cloud service -+ Retrieving storage accounts -+ Configuring certificate -+ Creating VM -info: vm create command OK ----- - -Now, your first instance is started. - -[TIP] -.Working with SSH -=============================================== - -You need to give the private key and username each time you log on your instance: - -[source,sh] ----- -ssh -i ~/.ssh/azure-private.key elasticsearch@myescluster.cloudapp.net ----- - -But you can also define it once in `~/.ssh/config` file: - -[source,text] ----- -Host *.cloudapp.net - User elasticsearch - StrictHostKeyChecking no - UserKnownHostsFile=/dev/null - IdentityFile ~/.ssh/azure-private.key ----- -=============================================== - -Next, you need to install Elasticsearch on your new instance. First, copy your -keystore to the instance, then connect to the instance using SSH: - -[source,sh] ----- -scp /tmp/azurekeystore.pkcs12 azure-elasticsearch-cluster.cloudapp.net:/home/elasticsearch -ssh azure-elasticsearch-cluster.cloudapp.net ----- - -Once connected, {ref}/install-elasticsearch.html[install {es}]. - -[[discovery-azure-classic-long-plugin]] -===== Install Elasticsearch cloud azure plugin - -[source,sh] ----- -# Install the plugin -sudo /usr/share/elasticsearch/bin/elasticsearch-plugin install discovery-azure-classic - -# Configure it -sudo vi /etc/elasticsearch/elasticsearch.yml ----- - -And add the following lines: - -[source,yaml] ----- -# If you don't remember your account id, you may get it with `azure account list` -cloud: - azure: - management: - subscription.id: your_azure_subscription_id - cloud.service.name: your_azure_cloud_service_name - keystore: - path: /home/elasticsearch/azurekeystore.pkcs12 - password: your_password_for_keystore - -discovery: - type: azure - -# Recommended (warning: non durable disk) -# path.data: /mnt/resource/elasticsearch/data ----- - -Start Elasticsearch: - -[source,sh] ----- -sudo systemctl start elasticsearch ----- - -If anything goes wrong, check your logs in `/var/log/elasticsearch`. - -[[discovery-azure-classic-scale]] -==== Scaling out! - -You need first to create an image of your previous machine. -Disconnect from your machine and run locally the following commands: - -[source,sh] ----- -# Shutdown the instance -azure vm shutdown myesnode1 - -# Create an image from this instance (it could take some minutes) -azure vm capture myesnode1 esnode-image --delete - -# Note that the previous instance has been deleted (mandatory) -# So you need to create it again and BTW create other instances. - -azure vm create azure-elasticsearch-cluster \ - esnode-image \ - --vm-name myesnode1 \ - --location "West Europe" \ - --vm-size extrasmall \ - --ssh 22 \ - --ssh-cert /tmp/azure-certificate.pem \ - elasticsearch password1234\!\! ----- - - -[TIP] -========================================= -It could happen that azure changes the endpoint public IP address. -DNS propagation could take some minutes before you can connect again using -name. You can get from azure the IP address if needed, using: - -[source,sh] ----- -# Look at Network `Endpoints 0 Vip` -azure vm show myesnode1 ----- - -========================================= - -Let's start more instances! - -[source,sh] ----- -for x in $(seq 2 10) - do - echo "Launching azure instance #$x..." - azure vm create azure-elasticsearch-cluster \ - esnode-image \ - --vm-name myesnode$x \ - --vm-size extrasmall \ - --ssh $((21 + $x)) \ - --ssh-cert /tmp/azure-certificate.pem \ - --connect \ - elasticsearch password1234\!\! - done ----- - -If you want to remove your running instances: - -[source,sh] ----- -azure vm delete myesnode1 ----- diff --git a/docs/plugins/discovery-ec2.asciidoc b/docs/plugins/discovery-ec2.asciidoc deleted file mode 100644 index 164e3398d7a4f..0000000000000 --- a/docs/plugins/discovery-ec2.asciidoc +++ /dev/null @@ -1,373 +0,0 @@ -[[discovery-ec2]] -=== EC2 Discovery plugin - -The EC2 discovery plugin provides a list of seed addresses to the -{ref}/discovery-hosts-providers.html[discovery process] by querying the -https://github.com/aws/aws-sdk-java[AWS API] for a list of EC2 instances -matching certain criteria determined by the <>. - -*If you are looking for a hosted solution of {es} on AWS, please visit -https://www.elastic.co/cloud.* - -:plugin_name: discovery-ec2 -include::install_remove.asciidoc[] - -[[discovery-ec2-usage]] -==== Using the EC2 discovery plugin - -The `discovery-ec2` plugin allows {es} to find the master-eligible nodes in a -cluster running on AWS EC2 by querying the -https://github.com/aws/aws-sdk-java[AWS API] for the addresses of the EC2 -instances running these nodes. - -It is normally a good idea to restrict the discovery process just to the -master-eligible nodes in the cluster. This plugin allows you to identify these -nodes by certain criteria including their tags, their membership of security -groups, and their placement within availability zones. The discovery process -will work correctly even if it finds master-ineligible nodes, but master -elections will be more efficient if this can be avoided. - -The interaction with the AWS API can be authenticated using the -https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html[instance -role], or else custom credentials can be supplied. - -===== Enabling EC2 discovery - -To enable EC2 discovery, configure {es} to use the `ec2` seed hosts provider: - -[source,yaml] ----- -discovery.seed_providers: ec2 ----- - -===== Configuring EC2 discovery - -EC2 discovery supports a number of settings. Some settings are sensitive and -must be stored in the {ref}/secure-settings.html[{es} keystore]. For example, -to authenticate using a particular access key and secret key, add these keys to -the keystore by running the following commands: - -[source,sh] ----- -bin/elasticsearch-keystore add discovery.ec2.access_key -bin/elasticsearch-keystore add discovery.ec2.secret_key ----- - -The available settings for the EC2 discovery plugin are as follows. - -`discovery.ec2.access_key` ({ref}/secure-settings.html[Secure], {ref}/secure-settings.html#reloadable-secure-settings[reloadable]):: - - An EC2 access key. If set, you must also set `discovery.ec2.secret_key`. - If unset, `discovery-ec2` will instead use the instance role. This setting - is sensitive and must be stored in the {es} keystore. - -`discovery.ec2.secret_key` ({ref}/secure-settings.html[Secure], {ref}/secure-settings.html#reloadable-secure-settings[reloadable]):: - - An EC2 secret key. If set, you must also set `discovery.ec2.access_key`. - This setting is sensitive and must be stored in the {es} keystore. - -`discovery.ec2.session_token` ({ref}/secure-settings.html[Secure], {ref}/secure-settings.html#reloadable-secure-settings[reloadable]):: - - An EC2 session token. If set, you must also set `discovery.ec2.access_key` - and `discovery.ec2.secret_key`. This setting is sensitive and must be - stored in the {es} keystore. - -`discovery.ec2.endpoint`:: - - The EC2 service endpoint to which to connect. See - https://docs.aws.amazon.com/general/latest/gr/rande.html#ec2_region to find - the appropriate endpoint for the region. This setting defaults to - `ec2.us-east-1.amazonaws.com` which is appropriate for clusters running in - the `us-east-1` region. - -`discovery.ec2.protocol`:: - - The protocol to use to connect to the EC2 service endpoint, which may be - either `http` or `https`. Defaults to `https`. - -`discovery.ec2.proxy.host`:: - - The address or host name of an HTTP proxy through which to connect to EC2. - If not set, no proxy is used. - -`discovery.ec2.proxy.port`:: - - When the address of an HTTP proxy is given in `discovery.ec2.proxy.host`, - this setting determines the port to use to connect to the proxy. Defaults to - `80`. - -`discovery.ec2.proxy.scheme`:: - - The scheme to use when connecting to the EC2 service endpoint through proxy specified - in `discovery.ec2.proxy.host`. Valid values are `http` or `https`. Defaults to `http`. - -`discovery.ec2.proxy.username` ({ref}/secure-settings.html[Secure], {ref}/secure-settings.html#reloadable-secure-settings[reloadable]):: - - When the address of an HTTP proxy is given in `discovery.ec2.proxy.host`, - this setting determines the username to use to connect to the proxy. When - not set, no username is used. This setting is sensitive and must be stored - in the {es} keystore. - -`discovery.ec2.proxy.password` ({ref}/secure-settings.html[Secure], {ref}/secure-settings.html#reloadable-secure-settings[reloadable]):: - - When the address of an HTTP proxy is given in `discovery.ec2.proxy.host`, - this setting determines the password to use to connect to the proxy. When - not set, no password is used. This setting is sensitive and must be stored - in the {es} keystore. - -`discovery.ec2.read_timeout`:: - - The socket timeout for connections to EC2, - {time-units}[including the units]. For example, a - value of `60s` specifies a 60-second timeout. Defaults to 50 seconds. - -`discovery.ec2.groups`:: - - A list of the names or IDs of the security groups to use for discovery. The - `discovery.ec2.any_group` setting determines the behaviour of this setting. - Defaults to an empty list, meaning that security group membership is - ignored by EC2 discovery. - -`discovery.ec2.any_group`:: - - Defaults to `true`, meaning that instances belonging to _any_ of the - security groups specified in `discovery.ec2.groups` will be used for - discovery. If set to `false`, only instances that belong to _all_ of the - security groups specified in `discovery.ec2.groups` will be used for - discovery. - -`discovery.ec2.host_type`:: - -+ --- - -Each EC2 instance has a number of different addresses that might be suitable -for discovery. This setting allows you to select which of these addresses is -used by the discovery process. It can be set to one of `private_ip`, -`public_ip`, `private_dns`, `public_dns` or `tag:TAGNAME` where `TAGNAME` -refers to a name of a tag. This setting defaults to `private_ip`. - -If you set `discovery.ec2.host_type` to a value of the form `tag:TAGNAME` then -the value of the tag `TAGNAME` attached to each instance will be used as that -instance's address for discovery. Instances which do not have this tag set will -be ignored by the discovery process. - -For example if you tag some EC2 instances with a tag named -`elasticsearch-host-name` and set `host_type: tag:elasticsearch-host-name` then -the `discovery-ec2` plugin will read each instance's host name from the value -of the `elasticsearch-host-name` tag. -https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html[Read more -about EC2 Tags]. - --- - -`discovery.ec2.availability_zones`:: - - A list of the names of the availability zones to use for discovery. The - name of an availability zone is the - https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html[region - code followed by a letter], such as `us-east-1a`. Only instances placed in - one of the given availability zones will be used for discovery. - -[[discovery-ec2-filtering]] -`discovery.ec2.tag.TAGNAME`:: - -+ --- - -A list of the values of a tag called `TAGNAME` to use for discovery. If set, -only instances that are tagged with one of the given values will be used for -discovery. For instance, the following settings will only use nodes with a -`role` tag set to `master` and an `environment` tag set to either `dev` or -`staging`. - -[source,yaml] ----- -discovery.ec2.tag.role: master -discovery.ec2.tag.environment: dev,staging ----- - -NOTE: The names of tags used for discovery may only contain ASCII letters, -numbers, hyphens and underscores. In particular you cannot use tags whose name -includes a colon. - --- - -`discovery.ec2.node_cache_time`:: - - Sets the length of time for which the collection of discovered instances is - cached. {es} waits at least this long between requests for discovery - information from the EC2 API. AWS may reject discovery requests if they are - made too often, and this would cause discovery to fail. Defaults to `10s`. - -All **secure** settings of this plugin are -{ref}/secure-settings.html#reloadable-secure-settings[reloadable], allowing you -to update the secure settings for this plugin without needing to restart each -node. - - -[[discovery-ec2-permissions]] -===== Recommended EC2 permissions - -The `discovery-ec2` plugin works by making a `DescribeInstances` call to the AWS -EC2 API. You must configure your AWS account to allow this, which is normally -done using an IAM policy. You can create a custom policy via the IAM Management -Console. It should look similar to this. - -[source,js] ----- -{ - "Statement": [ - { - "Action": [ - "ec2:DescribeInstances" - ], - "Effect": "Allow", - "Resource": [ - "*" - ] - } - ], - "Version": "2012-10-17" -} ----- -// NOTCONSOLE - -[[discovery-ec2-attributes]] -===== Automatic node attributes - -The `discovery-ec2` plugin can automatically set the `aws_availability_zone` -node attribute to the availability zone of each node. This node attribute -allows you to ensure that each shard has copies allocated redundantly across -multiple availability zones by using the -{ref}/shard-allocation-awareness.html#[Allocation Awareness] -feature. - -In order to enable the automatic definition of the `aws_availability_zone` -attribute, set `cloud.node.auto_attributes` to `true`. For example: - -[source,yaml] ----- -cloud.node.auto_attributes: true -cluster.routing.allocation.awareness.attributes: aws_availability_zone ----- - -The `aws_availability_zone` attribute can be automatically set like this when -using any discovery type. It is not necessary to set `discovery.seed_providers: -ec2`. However this feature does require that the `discovery-ec2` plugin is -installed. - -[[discovery-ec2-network-host]] -===== Binding to the correct address - -It is important to define `network.host` correctly when deploying a cluster on -EC2. By default each {es} node only binds to `localhost`, which will prevent it -from being discovered by nodes running on any other instances. - -You can use the {ref}/modules-network.html[core network host settings] to bind -each node to the desired address, or you can set `network.host` to one of the -following EC2-specific settings provided by the `discovery-ec2` plugin: - -[cols="<,<",options="header",] -|================================================================== -|EC2 Host Value |Description -|`_ec2:privateIpv4_` |The private IP address (ipv4) of the machine. -|`_ec2:privateDns_` |The private host of the machine. -|`_ec2:publicIpv4_` |The public IP address (ipv4) of the machine. -|`_ec2:publicDns_` |The public host of the machine. -|`_ec2:privateIp_` |Equivalent to `_ec2:privateIpv4_`. -|`_ec2:publicIp_` |Equivalent to `_ec2:publicIpv4_`. -|`_ec2_` |Equivalent to `_ec2:privateIpv4_`. -|================================================================== - -These values are acceptable when using any discovery type. They do not require -you to set `discovery.seed_providers: ec2`. However they do require that the -`discovery-ec2` plugin is installed. - -[[cloud-aws-best-practices]] -==== Best Practices in AWS - -This section contains some other information about designing and managing an -{es} cluster on your own AWS infrastructure. If you would prefer to avoid these -operational details then you may be interested in a hosted {es} installation -available on AWS-based infrastructure from https://www.elastic.co/cloud. - -===== Storage - -EC2 instances offer a number of different kinds of storage. Please be aware of -the following when selecting the storage for your cluster: - -* https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html[Instance -Store] is recommended for {es} clusters as it offers excellent performance and -is cheaper than EBS-based storage. {es} is designed to work well with this kind -of ephemeral storage because it replicates each shard across multiple nodes. If -a node fails and its Instance Store is lost then {es} will rebuild any lost -shards from other copies. - -* https://aws.amazon.com/ebs/[EBS-based storage] may be acceptable -for smaller clusters (1-2 nodes). Be sure to use provisioned IOPS to ensure -your cluster has satisfactory performance. - -* https://aws.amazon.com/efs/[EFS-based storage] is not -recommended or supported as it does not offer satisfactory performance. -Historically, shared network filesystems such as EFS have not always offered -precisely the behaviour that {es} requires of its filesystem, and this has been -known to lead to index corruption. Although EFS offers durability, shared -storage, and the ability to grow and shrink filesystems dynamically, you can -achieve the same benefits using {es} directly. - -===== Choice of AMI - -Prefer the https://aws.amazon.com/amazon-linux-2/[Amazon Linux 2 AMIs] as these -allow you to benefit from the lightweight nature, support, and EC2-specific -performance enhancements that these images offer. - -===== Networking - -* Smaller instance types have limited network performance, in terms of both -https://lab.getbase.com/how-we-discovered-limitations-on-the-aws-tcp-stack/[bandwidth -and number of connections]. If networking is a bottleneck, avoid -https://aws.amazon.com/ec2/instance-types/[instance types] with networking -labelled as `Moderate` or `Low`. - -* It is a good idea to distribute your nodes across multiple -https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html[availability -zones] and use {ref}/shard-allocation-awareness.html[shard -allocation awareness] to ensure that each shard has copies in more than one -availability zone. - -* Do not span a cluster across regions. {es} expects that node-to-node -connections within a cluster are reasonably reliable and offer high bandwidth -and low latency, and these properties do not hold for connections between -regions. Although an {es} cluster will behave correctly when node-to-node -connections are unreliable or slow, it is not optimised for this case and its -performance may suffer. If you wish to geographically distribute your data, you -should provision multiple clusters and use features such as -{ref}/modules-cross-cluster-search.html[cross-cluster search] and -{ref}/xpack-ccr.html[cross-cluster replication]. - -===== Other recommendations - -* If you have split your nodes into roles, consider -https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html[tagging the -EC2 instances] by role to make it easier to filter and view your EC2 instances -in the AWS console. - -* Consider -https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/terminating-instances.html#Using_ChangingDisableAPITermination[enabling -termination protection] for all of your data and master-eligible nodes. This -will help to prevent accidental termination of these nodes which could -temporarily reduce the resilience of the cluster and which could cause a -potentially disruptive reallocation of shards. - -* If running your cluster using one or more -https://docs.aws.amazon.com/autoscaling/ec2/userguide/AutoScalingGroup.html[auto-scaling -groups], consider protecting your data and master-eligible nodes -https://docs.aws.amazon.com/autoscaling/ec2/userguide/as-instance-termination.html#instance-protection-instance[against -termination during scale-in]. This will help to prevent automatic termination -of these nodes which could temporarily reduce the resilience of the cluster and -which could cause a potentially disruptive reallocation of shards. If these -instances are protected against termination during scale-in then you can use -{ref}/shard-allocation-filtering.html[shard allocation filtering] to gracefully -migrate any data off these nodes before terminating them manually. diff --git a/docs/plugins/discovery-gce.asciidoc b/docs/plugins/discovery-gce.asciidoc deleted file mode 100644 index 0a2629b7f094b..0000000000000 --- a/docs/plugins/discovery-gce.asciidoc +++ /dev/null @@ -1,488 +0,0 @@ -[[discovery-gce]] -=== GCE Discovery plugin - -The Google Compute Engine Discovery plugin uses the GCE API to identify the -addresses of seed hosts. - -:plugin_name: discovery-gce -include::install_remove.asciidoc[] - -[[discovery-gce-usage]] -==== GCE Virtual Machine discovery - -Google Compute Engine VM discovery allows to use the google APIs to perform -automatic discovery of seed hosts. Here is a simple sample configuration: - -[source,yaml] --------------------------------------------------- -cloud: - gce: - project_id: - zone: -discovery: - seed_providers: gce --------------------------------------------------- - -The following gce settings (prefixed with `cloud.gce`) are supported: - - `project_id`:: - - Your Google project id. - By default the project id will be derived from the instance metadata. - - Note: Deriving the project id from system properties or environment variables - (`GOOGLE_CLOUD_PROJECT` or `GCLOUD_PROJECT`) is not supported. - - `zone`:: - - helps to retrieve instances running in a given zone. - It should be one of the https://developers.google.com/compute/docs/zones#available[GCE supported zones]. - By default the zone will be derived from the instance metadata. - See also <>. - - `retry`:: - - If set to `true`, client will use - https://developers.google.com/api-client-library/java/google-http-java-client/backoff[ExponentialBackOff] - policy to retry the failed http request. Defaults to `true`. - - `max_wait`:: - - The maximum elapsed time after the client instantiating retry. If the time elapsed goes past the - `max_wait`, client stops to retry. A negative value means that it will wait indefinitely. Defaults to `0s` (retry - indefinitely). - - `refresh_interval`:: - - How long the list of hosts is cached to prevent further requests to the GCE API. `0s` disables caching. - A negative value will cause infinite caching. Defaults to `0s`. - - -[IMPORTANT] -.Binding the network host -============================================== - -It's important to define `network.host` as by default it's bound to `localhost`. - -You can use {ref}/modules-network.html[core network host settings] or -<>: - -============================================== - -[[discovery-gce-network-host]] -==== GCE Network Host - -When the `discovery-gce` plugin is installed, the following are also allowed -as valid network host settings: - -[cols="<,<",options="header",] -|================================================================== -|GCE Host Value |Description -|`_gce:privateIp:X_` |The private IP address of the machine for a given network interface. -|`_gce:hostname_` |The hostname of the machine. -|`_gce_` |Same as `_gce:privateIp:0_` (recommended). -|================================================================== - -Examples: - -[source,yaml] --------------------------------------------------- -# get the IP address from network interface 1 -network.host: _gce:privateIp:1_ -# Using GCE internal hostname -network.host: _gce:hostname_ -# shortcut for _gce:privateIp:0_ (recommended) -network.host: _gce_ --------------------------------------------------- - -[[discovery-gce-usage-short]] -===== How to start (short story) - -* Create Google Compute Engine instance (with compute rw permissions) -* Install Elasticsearch -* Install Google Compute Engine Cloud plugin -* Modify `elasticsearch.yml` file -* Start Elasticsearch - -[[discovery-gce-usage-long]] -==== Setting up GCE Discovery - - -[[discovery-gce-usage-long-prerequisites]] -===== Prerequisites - -Before starting, you need: - -* Your project ID, e.g. `es-cloud`. Get it from https://code.google.com/apis/console/[Google API Console]. -* To install https://developers.google.com/cloud/sdk/[Google Cloud SDK] - -If you did not set it yet, you can define your default project you will work on: - -[source,sh] --------------------------------------------------- -gcloud config set project es-cloud --------------------------------------------------- - -[[discovery-gce-usage-long-login]] -===== Login to Google Cloud - -If you haven't already, login to Google Cloud - -[source,sh] --------------------------------------------------- -gcloud auth login --------------------------------------------------- - -This will open your browser. You will be asked to sign-in to a Google account and -authorize access to the Google Cloud SDK. - -[[discovery-gce-usage-long-first-instance]] -===== Creating your first instance - - -[source,sh] --------------------------------------------------- -gcloud compute instances create myesnode1 \ - --zone \ - --scopes compute-rw --------------------------------------------------- - -When done, a report like this one should appears: - -[source,text] --------------------------------------------------- -Created [https://www.googleapis.com/compute/v1/projects/es-cloud-1070/zones/us-central1-f/instances/myesnode1]. -NAME ZONE MACHINE_TYPE PREEMPTIBLE INTERNAL_IP EXTERNAL_IP STATUS -myesnode1 us-central1-f n1-standard-1 10.240.133.54 104.197.94.25 RUNNING --------------------------------------------------- - -You can now connect to your instance: - -[source,sh] --------------------------------------------------- -# Connect using google cloud SDK -gcloud compute ssh myesnode1 --zone europe-west1-a - -# Or using SSH with external IP address -ssh -i ~/.ssh/google_compute_engine 192.158.29.199 --------------------------------------------------- - -[IMPORTANT] -.Service Account Permissions -============================================== - -It's important when creating an instance that the correct permissions are set. At a minimum, you must ensure you have: - -[source,text] --------------------------------------------------- -scopes=compute-rw --------------------------------------------------- - -Failing to set this will result in unauthorized messages when starting Elasticsearch. -See <>. -============================================== - -Once connected, {ref}/install-elasticsearch.html[install {es}]. - -[[discovery-gce-usage-long-install-plugin]] -===== Install Elasticsearch discovery gce plugin - -Install the plugin: - -[source,sh] --------------------------------------------------- -# Use Plugin Manager to install it -sudo bin/elasticsearch-plugin install discovery-gce --------------------------------------------------- - -Open the `elasticsearch.yml` file: - -[source,sh] --------------------------------------------------- -sudo vi /etc/elasticsearch/elasticsearch.yml --------------------------------------------------- - -And add the following lines: - -[source,yaml] --------------------------------------------------- -cloud: - gce: - project_id: es-cloud - zone: europe-west1-a -discovery: - seed_providers: gce --------------------------------------------------- - - -Start Elasticsearch: - -[source,sh] --------------------------------------------------- -sudo systemctl start elasticsearch --------------------------------------------------- - -If anything goes wrong, you should check logs: - -[source,sh] --------------------------------------------------- -tail -f /var/log/elasticsearch/elasticsearch.log --------------------------------------------------- - -If needed, you can change log level to `trace` by opening `log4j2.properties`: - -[source,sh] --------------------------------------------------- -sudo vi /etc/elasticsearch/log4j2.properties --------------------------------------------------- - -and adding the following line: - -[source,yaml] --------------------------------------------------- -# discovery -logger.discovery_gce.name = discovery.gce -logger.discovery_gce.level = trace --------------------------------------------------- - - - -[[discovery-gce-usage-cloning]] -==== Cloning your existing machine - -In order to build a cluster on many nodes, you can clone your configured instance to new nodes. -You won't have to reinstall everything! - -First create an image of your running instance and upload it to Google Cloud Storage: - -[source,sh] --------------------------------------------------- -# Create an image of your current instance -sudo /usr/bin/gcimagebundle -d /dev/sda -o /tmp/ - -# An image has been created in `/tmp` directory: -ls /tmp -e4686d7f5bf904a924ae0cfeb58d0827c6d5b966.image.tar.gz - -# Upload your image to Google Cloud Storage: -# Create a bucket to hold your image, let's say `esimage`: -gsutil mb gs://esimage - -# Copy your image to this bucket: -gsutil cp /tmp/e4686d7f5bf904a924ae0cfeb58d0827c6d5b966.image.tar.gz gs://esimage - -# Then add your image to images collection: -gcloud compute images create elasticsearch-2-0-0 --source-uri gs://esimage/e4686d7f5bf904a924ae0cfeb58d0827c6d5b966.image.tar.gz - -# If the previous command did not work for you, logout from your instance -# and launch the same command from your local machine. --------------------------------------------------- - -[[discovery-gce-usage-start-new-instances]] -===== Start new instances - -As you have now an image, you can create as many instances as you need: - -[source,sh] --------------------------------------------------- -# Just change node name (here myesnode2) -gcloud compute instances create myesnode2 --image elasticsearch-2-0-0 --zone europe-west1-a - -# If you want to provide all details directly, you can use: -gcloud compute instances create myesnode2 --image=elasticsearch-2-0-0 \ - --zone europe-west1-a --machine-type f1-micro --scopes=compute-rw --------------------------------------------------- - -[[discovery-gce-usage-remove-instance]] -===== Remove an instance (aka shut it down) - -You can use https://cloud.google.com/console[Google Cloud Console] or CLI to manage your instances: - -[source,sh] --------------------------------------------------- -# Stopping and removing instances -gcloud compute instances delete myesnode1 myesnode2 \ - --zone=europe-west1-a - -# Consider removing disk as well if you don't need them anymore -gcloud compute disks delete boot-myesnode1 boot-myesnode2 \ - --zone=europe-west1-a --------------------------------------------------- - -[[discovery-gce-usage-zones]] -==== Using GCE zones - -`cloud.gce.zone` helps to retrieve instances running in a given zone. It should be one of the -https://developers.google.com/compute/docs/zones#available[GCE supported zones]. - -The GCE discovery can support multi zones although you need to be aware of network latency between zones. -To enable discovery across more than one zone, just enter add your zone list to `cloud.gce.zone` setting: - -[source,yaml] --------------------------------------------------- -cloud: - gce: - project_id: - zone: ["", ""] -discovery: - seed_providers: gce --------------------------------------------------- - - - -[[discovery-gce-usage-tags]] -==== Filtering by tags - -The GCE discovery can also filter machines to include in the cluster based on tags using `discovery.gce.tags` settings. -For example, setting `discovery.gce.tags` to `dev` will only filter instances having a tag set to `dev`. Several tags -set will require all of those tags to be set for the instance to be included. - -One practical use for tag filtering is when a GCE cluster contains many nodes -that are not master-eligible {es} nodes. In this case, tagging the GCE -instances that _are_ running the master-eligible {es} nodes, and then filtering -by that tag, will help discovery to run more efficiently. - -Add your tag when building the new instance: - -[source,sh] --------------------------------------------------- -gcloud compute instances create myesnode1 --project=es-cloud \ - --scopes=compute-rw \ - --tags=elasticsearch,dev --------------------------------------------------- - -Then, define it in `elasticsearch.yml`: - -[source,yaml] --------------------------------------------------- -cloud: - gce: - project_id: es-cloud - zone: europe-west1-a -discovery: - seed_providers: gce - gce: - tags: elasticsearch, dev --------------------------------------------------- - -[[discovery-gce-usage-port]] -==== Changing default transport port - -By default, Elasticsearch GCE plugin assumes that you run Elasticsearch on 9300 default port. -But you can specify the port value Elasticsearch is meant to use using google compute engine metadata `es_port`: - -[[discovery-gce-usage-port-create]] -===== When creating instance - -Add `--metadata es_port=9301` option: - -[source,sh] --------------------------------------------------- -# when creating first instance -gcloud compute instances create myesnode1 \ - --scopes=compute-rw,storage-full \ - --metadata es_port=9301 - -# when creating an instance from an image -gcloud compute instances create myesnode2 --image=elasticsearch-1-0-0-RC1 \ - --zone europe-west1-a --machine-type f1-micro --scopes=compute-rw \ - --metadata es_port=9301 --------------------------------------------------- - -[[discovery-gce-usage-port-run]] -===== On a running instance - -[source,sh] --------------------------------------------------- -gcloud compute instances add-metadata myesnode1 \ - --zone europe-west1-a \ - --metadata es_port=9301 --------------------------------------------------- - - -[[discovery-gce-usage-tips]] -==== GCE Tips - -[[discovery-gce-usage-tips-projectid]] -===== Store project id locally - -If you don't want to repeat the project id each time, you can save it in the local gcloud config - -[source,sh] --------------------------------------------------- -gcloud config set project es-cloud --------------------------------------------------- - -[[discovery-gce-usage-tips-permissions]] -===== Machine Permissions - -If you have created a machine without the correct permissions, you will see `403 unauthorized` error messages. To change machine permission on an existing instance, first stop the instance then Edit. Scroll down to `Access Scopes` to change permission. The other way to alter these permissions is to delete the instance (NOT THE DISK). Then create another with the correct permissions. - -Creating machines with gcloud:: -+ --- -Ensure the following flags are set: - -[source,text] --------------------------------------------------- ---scopes=compute-rw --------------------------------------------------- --- - -Creating with console (web):: -+ --- -When creating an instance using the web console, scroll down to **Identity and API access**. - -Select a service account with the correct permissions or choose **Compute Engine default service account** and select **Allow default access** for **Access scopes**. --- - -Creating with knife google:: -+ --- -Set the service account scopes when creating the machine: - -[source,sh] --------------------------------------------------- -knife google server create www1 \ - -m n1-standard-1 \ - -I debian-8 \ - -Z us-central1-a \ - -i ~/.ssh/id_rsa \ - -x jdoe \ - --gce-service-account-scopes https://www.googleapis.com/auth/compute --------------------------------------------------- - -Or, you may use the alias: - -[source,sh] --------------------------------------------------- - --gce-service-account-scopes compute-rw --------------------------------------------------- --- - -[[discovery-gce-usage-testing]] -==== Testing GCE - -Integrations tests in this plugin require working GCE configuration and -therefore disabled by default. To enable tests prepare a config file -elasticsearch.yml with the following content: - -[source,yaml] --------------------------------------------------- -cloud: - gce: - project_id: es-cloud - zone: europe-west1-a -discovery: - seed_providers: gce --------------------------------------------------- - -Replace `project_id` and `zone` with your settings. - -To run test: - -[source,sh] --------------------------------------------------- -mvn -Dtests.gce=true -Dtests.config=/path/to/config/file/elasticsearch.yml clean test --------------------------------------------------- diff --git a/docs/plugins/discovery.asciidoc b/docs/plugins/discovery.asciidoc deleted file mode 100644 index 5cd08d98907f0..0000000000000 --- a/docs/plugins/discovery.asciidoc +++ /dev/null @@ -1,32 +0,0 @@ -[[discovery]] -== Discovery plugins - -Discovery plugins extend Elasticsearch by adding new seed hosts providers that -can be used to extend the {ref}/modules-discovery.html[cluster formation -module]. - -[discrete] -==== Core discovery plugins - -The core discovery plugins are: - -<>:: - -The EC2 discovery plugin uses the https://github.com/aws/aws-sdk-java[AWS API] -to identify the addresses of seed hosts. - -<>:: - -The Azure Classic discovery plugin uses the Azure Classic API to identify the -addresses of seed hosts. - -<>:: - -The Google Compute Engine discovery plugin uses the GCE API to identify the -addresses of seed hosts. - -include::discovery-ec2.asciidoc[] - -include::discovery-azure-classic.asciidoc[] - -include::discovery-gce.asciidoc[] diff --git a/docs/plugins/index.asciidoc b/docs/plugins/index.asciidoc deleted file mode 100644 index e1b0b171bb1fa..0000000000000 --- a/docs/plugins/index.asciidoc +++ /dev/null @@ -1,54 +0,0 @@ -= Elasticsearch Plugins and Integrations - -include::../Versions.asciidoc[] - -[[intro]] -== Introduction to plugins - -Plugins are a way to enhance the core Elasticsearch functionality in a custom -manner. They range from adding custom mapping types, custom analyzers, native -scripts, custom discovery and more. - -Plugins contain JAR files, but may also contain scripts and config files, and -must be installed on every node in the cluster. After installation, each -node must be restarted before the plugin becomes visible. - -NOTE: A full cluster restart is required for installing plugins that have -custom cluster state metadata. It is still possible to upgrade -such plugins with a rolling restart. - -This documentation distinguishes two categories of plugins: - -Core Plugins:: This category identifies plugins that are part of Elasticsearch -project. Delivered at the same time as Elasticsearch, their version number always -matches the version number of Elasticsearch itself. These plugins are maintained -by the Elastic team with the appreciated help of amazing community members (for -open source plugins). Issues and bug reports can be reported on the -https://github.com/elastic/elasticsearch[Github project page]. - -Community contributed:: This category identifies plugins that are external to -the Elasticsearch project. They are provided by individual developers or private -companies and have their own licenses as well as their own versioning system. -Issues and bug reports can usually be reported on the community plugin's web site. - -For advice on writing your own plugin, refer to <>. - -include::plugin-script.asciidoc[] - -include::api.asciidoc[] - -include::analysis.asciidoc[] - -include::discovery.asciidoc[] - -include::mapper.asciidoc[] - -include::repository.asciidoc[] - -include::store.asciidoc[] - -include::integrations.asciidoc[] - -include::authors.asciidoc[] - -include::redirects.asciidoc[] diff --git a/docs/plugins/ingest-user-agent.asciidoc b/docs/plugins/ingest-user-agent.asciidoc deleted file mode 100644 index 51bfe7376c48a..0000000000000 --- a/docs/plugins/ingest-user-agent.asciidoc +++ /dev/null @@ -1,7 +0,0 @@ -[[ingest-user-agent]] -=== Ingest `user_agent` Processor Plugin - -The `user_agent` processor is no longer distributed as a plugin, but is now a module -distributed by default with Elasticsearch. See the -{ref}/user-agent-processor.html[User Agent processor] for more details. - diff --git a/docs/plugins/install_remove.asciidoc b/docs/plugins/install_remove.asciidoc deleted file mode 100644 index 893af4dac42ff..0000000000000 --- a/docs/plugins/install_remove.asciidoc +++ /dev/null @@ -1,39 +0,0 @@ -[discrete] -[id="{plugin_name}-install"] -==== Installation - -ifeval::["{release-state}"=="unreleased"] - -WARNING: Version {version} of the Elastic Stack has not yet been released. The plugin might not be available. - -endif::[] - -This plugin can be installed using the plugin manager: - -["source","sh",subs="attributes,callouts"] ----------------------------------------------------------------- -sudo bin/elasticsearch-plugin install {plugin_name} ----------------------------------------------------------------- - -The plugin must be installed on every node in the cluster, and each node must -be restarted after installation. - -You can download this plugin for <> from {plugin_url}/{plugin_name}/{plugin_name}-{version}.zip. To verify -the `.zip` file, use the -{plugin_url}/{plugin_name}/{plugin_name}-{version}.zip.sha512[SHA hash] or -{plugin_url}/{plugin_name}/{plugin_name}-{version}.zip.asc[ASC key]. - -[discrete] -[id="{plugin_name}-remove"] -==== Removal - -The plugin can be removed with the following command: - -["source","sh",subs="attributes,callouts"] ----------------------------------------------------------------- -sudo bin/elasticsearch-plugin remove {plugin_name} ----------------------------------------------------------------- - -The node must be stopped before removing the plugin. - diff --git a/docs/plugins/integrations.asciidoc b/docs/plugins/integrations.asciidoc deleted file mode 100644 index aff4aed0becd2..0000000000000 --- a/docs/plugins/integrations.asciidoc +++ /dev/null @@ -1,164 +0,0 @@ -[[integrations]] - -== Integrations - -Integrations are not plugins, but are external tools or modules that make it easier to work with Elasticsearch. - -[discrete] -[[cms-integrations]] -=== CMS integrations - -[discrete] -==== Supported by the community: - -* https://wordpress.org/plugins/elasticpress/[ElasticPress]: - Elasticsearch WordPress Plugin - -* https://doc.tiki.org/Elasticsearch[Tiki Wiki CMS Groupware]: - Tiki has native support for Elasticsearch. This provides faster & better - search (facets, etc), along with some Natural Language Processing features - (ex.: More like this) - -* https://extensions.xwiki.org/xwiki/bin/view/Extension/Elastic+Search+Macro/[XWiki Next Generation Wiki]: - XWiki has an Elasticsearch and Kibana macro allowing to run Elasticsearch queries and display the results in XWiki pages using XWiki's scripting language as well as include Kibana Widgets in XWiki pages - -[discrete] -==== Supported by Elastic: - -* {logstash-ref}/plugins-outputs-elasticsearch.html[Logstash output to Elasticsearch]: - The Logstash `elasticsearch` output plugin. -* {logstash-ref}/plugins-inputs-elasticsearch.html[Elasticsearch input to Logstash] - The Logstash `elasticsearch` input plugin. -* {logstash-ref}/plugins-filters-elasticsearch.html[Elasticsearch event filtering in Logstash] - The Logstash `elasticsearch` filter plugin. -* {logstash-ref}/plugins-codecs-es_bulk.html[Elasticsearch bulk codec] - The Logstash `es_bulk` plugin decodes the Elasticsearch bulk format into individual events. - -[discrete] -==== Supported by the community: - -* https://github.com/spinscale/cookiecutter-elasticsearch-ingest-processor[Ingest processor template]: - A template for creating new ingest processors. - -* https://github.com/BigDataDevs/kafka-elasticsearch-consumer[Kafka Standalone Consumer (Indexer)]: - Kafka Standalone Consumer [Indexer] will read messages from Kafka in batches, processes(as implemented) and bulk-indexes them into Elasticsearch. Flexible and scalable. More documentation in above GitHub repo's Wiki. - -* https://github.com/Aconex/scrutineer[Scrutineer]: - A high performance consistency checker to compare what you've indexed - with your source of truth content (e.g. DB) - -* https://github.com/dadoonet/fscrawler[FS Crawler]: - The File System (FS) crawler allows to index documents (PDF, Open Office...) from your local file system and over SSH. (by David Pilato) - -* https://github.com/senacor/elasticsearch-evolution[Elasticsearch Evolution]: - A library to migrate elasticsearch mappings. - -* https://pgsync.com[PGSync]: - A tool for syncing data from Postgres to Elasticsearch. - -[discrete] -[[deployment]] -=== Deployment - -[discrete] -==== Supported by the community: -* https://github.com/elastic/ansible-elasticsearch[Ansible]: - Ansible playbook for Elasticsearch. - -* https://github.com/elastic/puppet-elasticsearch[Puppet]: - Elasticsearch puppet module. - -* https://github.com/elastic/cookbook-elasticsearch[Chef]: - Chef cookbook for Elasticsearch - -[discrete] -[[framework-integrations]] -=== Framework integrations - -[discrete] -==== Supported by the community: - -* https://camel.apache.org/components/2.x/elasticsearch-component.html[Apache Camel Integration]: - An Apache camel component to integrate Elasticsearch - -* https://metacpan.org/pod/Catmandu::Store::ElasticSearch[Catmandu]: - An Elasticsearch backend for the Catmandu framework. - -* https://github.com/FriendsOfSymfony/FOSElasticaBundle[FOSElasticaBundle]: - Symfony2 Bundle wrapping Elastica. - -* https://plugins.grails.org/plugin/puneetbehl/elasticsearch[Grails]: - Elasticsearch Grails plugin. - -* https://hibernate.org/search/[Hibernate Search] - Integration with Hibernate ORM, from the Hibernate team. Automatic synchronization of write operations, yet exposes full Elasticsearch capabilities for queries. Can return either Elasticsearch native or re-map queries back into managed entities loaded within transactions from the reference database. - -* https://github.com/spring-projects/spring-data-elasticsearch[Spring Data Elasticsearch]: - Spring Data implementation for Elasticsearch - -* https://github.com/dadoonet/spring-elasticsearch[Spring Elasticsearch]: - Spring Factory for Elasticsearch - -* https://zeebe.io[Zeebe]: - An Elasticsearch exporter acts as a bridge between Zeebe and Elasticsearch - -* https://pulsar.apache.org/docs/en/io-elasticsearch[Apache Pulsar]: - The Elasticsearch Sink Connector is used to pull messages from Pulsar topics - and persist the messages to an index. - -* https://micronaut-projects.github.io/micronaut-elasticsearch/latest/guide/index.html[Micronaut Elasticsearch Integration]: - Integration of Micronaut with Elasticsearch - -* https://streampipes.apache.org[Apache StreamPipes]: - StreamPipes is a framework that enables users to work with IoT data sources. - -* https://metamodel.apache.org/[Apache MetaModel]: - Providing a common interface for discovery, exploration of metadata and querying of different types of data sources. - -* https://micrometer.io[Micrometer]: - Vendor-neutral application metrics facade. Think SLF4j, but for metrics. - -[discrete] -[[hadoop-integrations]] -=== Hadoop integrations - -[discrete] -==== Supported by Elastic: - -* link:/guide/en/elasticsearch/hadoop/current/[es-hadoop]: Elasticsearch real-time - search and analytics natively integrated with Hadoop. Supports Map/Reduce, - Cascading, Apache Hive, Apache Pig, Apache Spark and Apache Storm. - -[discrete] -==== Supported by the community: - -* https://github.com/criteo/garmadon[Garmadon]: - Garmadon is a solution for Hadoop Cluster realtime introspection. - - -[discrete] -[[monitoring-integrations]] -=== Health and Performance Monitoring - -[discrete] -==== Supported by the community: - -* https://sematext.com/spm/index.html[SPM for Elasticsearch]: - Performance monitoring with live charts showing cluster and node stats, integrated - alerts, email reports, etc. -* https://www.zabbix.com/integrations/elasticsearch[Zabbix monitoring template]: - Monitor the performance and status of your {es} nodes and cluster with Zabbix - and receive events information. - -[[other-integrations]] -[discrete] -=== Other integrations - -[discrete] -==== Supported by the community: - -* https://www.wireshark.org/[Wireshark]: - Protocol dissection for HTTP and the transport protocol - -* https://www.itemsapi.com/[ItemsAPI]: - Search backend for mobile and web diff --git a/docs/plugins/mapper-annotated-text.asciidoc b/docs/plugins/mapper-annotated-text.asciidoc deleted file mode 100644 index 956b6bedffff1..0000000000000 --- a/docs/plugins/mapper-annotated-text.asciidoc +++ /dev/null @@ -1,432 +0,0 @@ -[[mapper-annotated-text]] -=== Mapper annotated text plugin - -experimental[] - -The mapper-annotated-text plugin provides the ability to index text that is a -combination of free-text and special markup that is typically used to identify -items of interest such as people or organisations (see NER or Named Entity Recognition -tools). - - -The elasticsearch markup allows one or more additional tokens to be injected, unchanged, into the token -stream at the same position as the underlying text it annotates. - -:plugin_name: mapper-annotated-text -include::install_remove.asciidoc[] - -[[mapper-annotated-text-usage]] -==== Using the `annotated-text` field - -The `annotated-text` tokenizes text content as per the more common {ref}/text.html[`text`] field (see -"limitations" below) but also injects any marked-up annotation tokens directly into -the search index: - -[source,console] --------------------------- -PUT my-index-000001 -{ - "mappings": { - "properties": { - "my_field": { - "type": "annotated_text" - } - } - } -} --------------------------- - -Such a mapping would allow marked-up text eg wikipedia articles to be indexed as both text -and structured tokens. The annotations use a markdown-like syntax using URL encoding of -one or more values separated by the `&` symbol. - - -We can use the "_analyze" api to test how an example annotation would be stored as tokens -in the search index: - - -[source,js] --------------------------- -GET my-index-000001/_analyze -{ - "field": "my_field", - "text":"Investors in [Apple](Apple+Inc.) rejoiced." -} --------------------------- -// NOTCONSOLE - -Response: - -[source,js] --------------------------------------------------- -{ - "tokens": [ - { - "token": "investors", - "start_offset": 0, - "end_offset": 9, - "type": "", - "position": 0 - }, - { - "token": "in", - "start_offset": 10, - "end_offset": 12, - "type": "", - "position": 1 - }, - { - "token": "Apple Inc.", <1> - "start_offset": 13, - "end_offset": 18, - "type": "annotation", - "position": 2 - }, - { - "token": "apple", - "start_offset": 13, - "end_offset": 18, - "type": "", - "position": 2 - }, - { - "token": "rejoiced", - "start_offset": 19, - "end_offset": 27, - "type": "", - "position": 3 - } - ] -} --------------------------------------------------- -// NOTCONSOLE - -<1> Note the whole annotation token `Apple Inc.` is placed, unchanged as a single token in -the token stream and at the same position (position 2) as the text token (`apple`) it annotates. - - -We can now perform searches for annotations using regular `term` queries that don't tokenize -the provided search values. Annotations are a more precise way of matching as can be seen -in this example where a search for `Beck` will not match `Jeff Beck` : - -[source,console] --------------------------- -# Example documents -PUT my-index-000001/_doc/1 -{ - "my_field": "[Beck](Beck) announced a new tour"<1> -} - -PUT my-index-000001/_doc/2 -{ - "my_field": "[Jeff Beck](Jeff+Beck&Guitarist) plays a strat"<2> -} - -# Example search -GET my-index-000001/_search -{ - "query": { - "term": { - "my_field": "Beck" <3> - } - } -} --------------------------- - -<1> As well as tokenising the plain text into single words e.g. `beck`, here we -inject the single token value `Beck` at the same position as `beck` in the token stream. -<2> Note annotations can inject multiple tokens at the same position - here we inject both -the very specific value `Jeff Beck` and the broader term `Guitarist`. This enables -broader positional queries e.g. finding mentions of a `Guitarist` near to `strat`. -<3> A benefit of searching with these carefully defined annotation tokens is that a query for -`Beck` will not match document 2 that contains the tokens `jeff`, `beck` and `Jeff Beck` - -WARNING: Any use of `=` signs in annotation values eg `[Prince](person=Prince)` will -cause the document to be rejected with a parse failure. In future we hope to have a use for -the equals signs so will actively reject documents that contain this today. - -[[annotated-text-synthetic-source]] -===== Synthetic `_source` - -IMPORTANT: Synthetic `_source` is Generally Available only for TSDB indices -(indices that have `index.mode` set to `time_series`). For other indices -synthetic `_source` is in technical preview. Features in technical preview may -be changed or removed in a future release. Elastic will work to fix -any issues, but features in technical preview are not subject to the support SLA -of official GA features. - -If using a sub-`keyword` field then the values are sorted in the same way as -a `keyword` field's values are sorted. By default, that means sorted with -duplicates removed. So: -[source,console,id=synthetic-source-text-example-default] ----- -PUT idx -{ - "settings": { - "index": { - "mapping": { - "source": { - "mode": "synthetic" - } - } - } - }, - "mappings": { - "properties": { - "text": { - "type": "annotated_text", - "fields": { - "raw": { - "type": "keyword" - } - } - } - } - } -} -PUT idx/_doc/1 -{ - "text": [ - "the quick brown fox", - "the quick brown fox", - "jumped over the lazy dog" - ] -} ----- -// TEST[s/$/\nGET idx\/_doc\/1?filter_path=_source\n/] - -Will become: -[source,console-result] ----- -{ - "text": [ - "jumped over the lazy dog", - "the quick brown fox" - ] -} ----- -// TEST[s/^/{"_source":/ s/\n$/}/] - -NOTE: Reordering text fields can have an effect on {ref}/query-dsl-match-query-phrase.html[phrase] -and {ref}/span-queries.html[span] queries. See the discussion about {ref}/position-increment-gap.html[`position_increment_gap`] for more detail. You -can avoid this by making sure the `slop` parameter on the phrase queries -is lower than the `position_increment_gap`. This is the default. - -If the `annotated_text` field sets `store` to true then order and duplicates -are preserved. -[source,console,id=synthetic-source-text-example-stored] ----- -PUT idx -{ - "settings": { - "index": { - "mapping": { - "source": { - "mode": "synthetic" - } - } - } - }, - "mappings": { - "properties": { - "text": { "type": "annotated_text", "store": true } - } - } -} -PUT idx/_doc/1 -{ - "text": [ - "the quick brown fox", - "the quick brown fox", - "jumped over the lazy dog" - ] -} ----- -// TEST[s/$/\nGET idx\/_doc\/1?filter_path=_source\n/] - -Will become: -[source,console-result] ----- -{ - "text": [ - "the quick brown fox", - "the quick brown fox", - "jumped over the lazy dog" - ] -} ----- -// TEST[s/^/{"_source":/ s/\n$/}/] - - -[[mapper-annotated-text-tips]] -==== Data modelling tips -===== Use structured and unstructured fields - -Annotations are normally a way of weaving structured information into unstructured text for -higher-precision search. - -`Entity resolution` is a form of document enrichment undertaken by specialist software or people -where references to entities in a document are disambiguated by attaching a canonical ID. -The ID is used to resolve any number of aliases or distinguish between people with the -same name. The hyperlinks connecting Wikipedia's articles are a good example of resolved -entity IDs woven into text. - -These IDs can be embedded as annotations in an annotated_text field but it often makes -sense to include them in dedicated structured fields to support discovery via aggregations: - -[source,console] --------------------------- -PUT my-index-000001 -{ - "mappings": { - "properties": { - "my_unstructured_text_field": { - "type": "annotated_text" - }, - "my_structured_people_field": { - "type": "text", - "fields": { - "keyword" : { - "type": "keyword" - } - } - } - } - } -} --------------------------- - -Applications would then typically provide content and discover it as follows: - -[source,console] --------------------------- -# Example documents -PUT my-index-000001/_doc/1 -{ - "my_unstructured_text_field": "[Shay](%40kimchy) created elasticsearch", - "my_twitter_handles": ["@kimchy"] <1> -} - -GET my-index-000001/_search -{ - "query": { - "query_string": { - "query": "elasticsearch OR logstash OR kibana",<2> - "default_field": "my_unstructured_text_field" - } - }, - "aggregations": { - "top_people" :{ - "significant_terms" : { <3> - "field" : "my_twitter_handles.keyword" - } - } - } -} --------------------------- - -<1> Note the `my_twitter_handles` contains a list of the annotation values -also used in the unstructured text. (Note the annotated_text syntax requires escaping). -By repeating the annotation values in a structured field this application has ensured that -the tokens discovered in the structured field can be used for search and highlighting -in the unstructured field. -<2> In this example we search for documents that talk about components of the elastic stack -<3> We use the `my_twitter_handles` field here to discover people who are significantly -associated with the elastic stack. - -===== Avoiding over-matching annotations -By design, the regular text tokens and the annotation tokens co-exist in the same indexed -field but in rare cases this can lead to some over-matching. - -The value of an annotation often denotes a _named entity_ (a person, place or company). -The tokens for these named entities are inserted untokenized, and differ from typical text -tokens because they are normally: - -* Mixed case e.g. `Madonna` -* Multiple words e.g. `Jeff Beck` -* Can have punctuation or numbers e.g. `Apple Inc.` or `@kimchy` - -This means, for the most part, a search for a named entity in the annotated text field will -not have any false positives e.g. when selecting `Apple Inc.` from an aggregation result -you can drill down to highlight uses in the text without "over matching" on any text tokens -like the word `apple` in this context: - - the apple was very juicy - -However, a problem arises if your named entity happens to be a single term and lower-case e.g. the -company `elastic`. In this case, a search on the annotated text field for the token `elastic` -may match a text document such as this: - - they fired an elastic band - -To avoid such false matches users should consider prefixing annotation values to ensure -they don't name clash with text tokens e.g. - - [elastic](Company_elastic) released version 7.0 of the elastic stack today - - - - -[[mapper-annotated-text-highlighter]] -==== Using the `annotated` highlighter - -The `annotated-text` plugin includes a custom highlighter designed to mark up search hits -in a way which is respectful of the original markup: - -[source,console] --------------------------- -# Example documents -PUT my-index-000001/_doc/1 -{ - "my_field": "The cat sat on the [mat](sku3578)" -} - -GET my-index-000001/_search -{ - "query": { - "query_string": { - "query": "cats" - } - }, - "highlight": { - "fields": { - "my_field": { - "type": "annotated", <1> - "require_field_match": false - } - } - } -} --------------------------- - -<1> The `annotated` highlighter type is designed for use with annotated_text fields - -The annotated highlighter is based on the `unified` highlighter and supports the same -settings but does not use the `pre_tags` or `post_tags` parameters. Rather than using -html-like markup such as `cat` the annotated highlighter uses the same -markdown-like syntax used for annotations and injects a key=value annotation where `_hit_term` -is the key and the matched search term is the value e.g. - - The [cat](_hit_term=cat) sat on the [mat](sku3578) - -The annotated highlighter tries to be respectful of any existing markup in the original -text: - -* If the search term matches exactly the location of an existing annotation then the -`_hit_term` key is merged into the url-like syntax used in the `(...)` part of the -existing annotation. -* However, if the search term overlaps the span of an existing annotation it would break -the markup formatting so the original annotation is removed in favour of a new annotation -with just the search hit information in the results. -* Any non-overlapping annotations in the original text are preserved in highlighter -selections - - -[[mapper-annotated-text-limitations]] -==== Limitations - -The annotated_text field type supports the same mapping settings as the `text` field type -but with the following exceptions: - -* No support for `fielddata` or `fielddata_frequency_filter` -* No support for `index_prefixes` or `index_phrases` indexing diff --git a/docs/plugins/mapper-murmur3.asciidoc b/docs/plugins/mapper-murmur3.asciidoc deleted file mode 100644 index 990f20caf9684..0000000000000 --- a/docs/plugins/mapper-murmur3.asciidoc +++ /dev/null @@ -1,73 +0,0 @@ -[[mapper-murmur3]] -=== Mapper murmur3 plugin - -The mapper-murmur3 plugin provides the ability to compute hash of field values -at index-time and store them in the index. This can sometimes be helpful when -running cardinality aggregations on high-cardinality and large string fields. - -:plugin_name: mapper-murmur3 -include::install_remove.asciidoc[] - -[[mapper-murmur3-usage]] -==== Using the `murmur3` field - -The `murmur3` is typically used within a multi-field, so that both the original -value and its hash are stored in the index: - -[source,console] --------------------------- -PUT my-index-000001 -{ - "mappings": { - "properties": { - "my_field": { - "type": "keyword", - "fields": { - "hash": { - "type": "murmur3" - } - } - } - } - } -} --------------------------- - -Such a mapping would allow to refer to `my_field.hash` in order to get hashes -of the values of the `my_field` field. This is only useful in order to run -`cardinality` aggregations: - -[source,console] --------------------------- -# Example documents -PUT my-index-000001/_doc/1 -{ - "my_field": "This is a document" -} - -PUT my-index-000001/_doc/2 -{ - "my_field": "This is another document" -} - -GET my-index-000001/_search -{ - "aggs": { - "my_field_cardinality": { - "cardinality": { - "field": "my_field.hash" <1> - } - } - } -} --------------------------- - -<1> Counting unique values on the `my_field.hash` field - -Running a `cardinality` aggregation on the `my_field` field directly would -yield the same result, however using `my_field.hash` instead might result in -a speed-up if the field has a high-cardinality. On the other hand, it is -discouraged to use the `murmur3` field on numeric fields and string fields -that are not almost unique as the use of a `murmur3` field is unlikely to -bring significant speed-ups, while increasing the amount of disk space required -to store the index. diff --git a/docs/plugins/mapper-size.asciidoc b/docs/plugins/mapper-size.asciidoc deleted file mode 100644 index c9682ae3d06eb..0000000000000 --- a/docs/plugins/mapper-size.asciidoc +++ /dev/null @@ -1,94 +0,0 @@ -[[mapper-size]] -=== Mapper size plugin - -The mapper-size plugin provides the `_size` metadata field which, when enabled, -indexes the size in bytes of the original -{ref}/mapping-source-field.html[`_source`] field. - -:plugin_name: mapper-size -include::install_remove.asciidoc[] - -[[mapper-size-usage]] -==== Using the `_size` field - -In order to enable the `_size` field, set the mapping as follows: - -[source,console] --------------------------- -PUT my-index-000001 -{ - "mappings": { - "_size": { - "enabled": true - } - } -} --------------------------- - -The value of the `_size` field is accessible in queries, aggregations, scripts, -and when sorting. It can be retrieved using the {ref}/search-fields.html#search-fields-param[fields parameter]: - -[source,console] --------------------------- -# Example documents -PUT my-index-000001/_doc/1 -{ - "text": "This is a document" -} - -PUT my-index-000001/_doc/2 -{ - "text": "This is another document" -} - -GET my-index-000001/_search -{ - "query": { - "range": { - "_size": { <1> - "gt": 10 - } - } - }, - "aggs": { - "sizes": { - "terms": { - "field": "_size", <2> - "size": 10 - } - } - }, - "sort": [ - { - "_size": { <3> - "order": "desc" - } - } - ], - "fields": ["_size"], <4> - "script_fields": { - "size": { - "script": "doc['_size']" <5> - } - } -} --------------------------- -// TEST[continued] - -<1> Querying on the `_size` field -<2> Aggregating on the `_size` field -<3> Sorting on the `_size` field -<4> Use the `fields` parameter to return the `_size` in the search response. -<5> Uses a -{ref}/search-fields.html#script-fields[script field] -to return the `_size` field in the search response. - -[NOTE] -.Using `_size` in {kib} -================================================ - -To use the `_size` field in {kib}, update the `metaFields` setting and add -`_size` to the list of meta fields. `metaFields` can be configured in {kib} -from the Advanced Settings page in Management. - -================================================ \ No newline at end of file diff --git a/docs/plugins/mapper.asciidoc b/docs/plugins/mapper.asciidoc deleted file mode 100644 index 1502bfef83da9..0000000000000 --- a/docs/plugins/mapper.asciidoc +++ /dev/null @@ -1,31 +0,0 @@ -[[mapper]] -== Mapper plugins - -Mapper plugins allow new field data types to be added to Elasticsearch. - -[discrete] -=== Core mapper plugins - -The core mapper plugins are: - -<>:: - -The mapper-size plugin provides the `_size` metadata field which, when enabled, -indexes the size in bytes of the original -{ref}/mapping-source-field.html[`_source`] field. - -<>:: - -The mapper-murmur3 plugin allows hashes to be computed at index-time and stored -in the index for later use with the `cardinality` aggregation. - -<>:: - -The annotated text plugin provides the ability to index text that is a -combination of free-text and special markup that is typically used to identify -items of interest such as people or organisations (see NER or Named Entity Recognition -tools). - -include::mapper-size.asciidoc[] -include::mapper-murmur3.asciidoc[] -include::mapper-annotated-text.asciidoc[] diff --git a/docs/plugins/plugin-script.asciidoc b/docs/plugins/plugin-script.asciidoc deleted file mode 100644 index 33dc4152784c5..0000000000000 --- a/docs/plugins/plugin-script.asciidoc +++ /dev/null @@ -1,347 +0,0 @@ -[[plugin-management]] -== Plugin management - -[discrete] -=== Managing plugins on {ess} - -Refer to the {cloud}/ec-adding-plugins.html[{ess} documentation] for information -about managing plugins on {ecloud}. - -[discrete] -=== Managing plugins for self-managed deployments - -Use the `elasticsearch-plugin` command line tool to install, list, and remove plugins. It is -located in the `$ES_HOME/bin` directory by default but it may be in a -different location depending on which Elasticsearch package you installed: - -* {ref}/targz.html#targz-layout[Directory layout of `.tar.gz` archives] -* {ref}/zip-windows.html#windows-layout[Directory layout of Windows `.zip` archives] -* {ref}/deb.html#deb-layout[Directory layout of Debian package] -* {ref}/rpm.html#rpm-layout[Directory layout of RPM] - -Run the following command to get usage instructions: - -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin -h ------------------------------------ - -[IMPORTANT] -.Running as root -===================== -If Elasticsearch was installed using the deb or rpm package then run -`/usr/share/elasticsearch/bin/elasticsearch-plugin` as `root` so it can write to the appropriate files on disk. -Otherwise run `bin/elasticsearch-plugin` as the user that owns all of the Elasticsearch -files. -===================== - -[discrete] -[[plugin-management-docker]] -=== Docker - -If you run {es} using Docker, you can manage plugins using a -<>. - -[[installation]] -=== Installing plugins - -The documentation for each plugin usually includes specific installation -instructions for that plugin, but below we document the various available -options: - -[discrete] -=== Core Elasticsearch plugins - -Core Elasticsearch plugins can be installed as follows: - -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin install [plugin_name] ------------------------------------ - -For instance, to install the core <>, just run the -following command: - -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin install analysis-icu ------------------------------------ - -This command will install the version of the plugin that matches your -Elasticsearch version and also show a progress bar while downloading. - -[[plugin-management-custom-url]] -=== Custom URL or file system - -A plugin can also be downloaded directly from a custom location by specifying the URL: - -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin install [url] <1> ------------------------------------ -<1> must be a valid URL, the plugin name is determined from its descriptor. - --- -Unix:: -To install a plugin from your local file system at `/path/to/plugin.zip`, you could run: -+ -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin install file:///path/to/plugin.zip ------------------------------------ - -Windows:: -To install a plugin from your local file system at `C:\path\to\plugin.zip`, you could run: -+ -[source,shell] ------------------------------------ -bin\elasticsearch-plugin install file:///C:/path/to/plugin.zip ------------------------------------ -+ -NOTE: Any path that contains spaces must be wrapped in quotes! -+ -NOTE: If you are installing a plugin from the filesystem the plugin distribution -must not be contained in the `plugins` directory for the node that you are -installing the plugin to or installation will fail. - -HTTP:: -To install a plugin from an HTTP URL: -+ -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin install https://some.domain/path/to/plugin.zip ------------------------------------ -+ -The plugin script will refuse to talk to an HTTPS URL with an untrusted -certificate. To use a self-signed HTTPS cert, you will need to add the CA cert -to a local Java truststore and pass the location to the script as follows: -+ -[source,shell] ------------------------------------ -sudo CLI_JAVA_OPTS="-Djavax.net.ssl.trustStore=/path/to/trustStore.jks" bin/elasticsearch-plugin install https://host/plugin.zip ------------------------------------ --- - -[[installing-multiple-plugins]] -=== Installing multiple plugins - -Multiple plugins can be installed in one invocation as follows: - -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin install [plugin_id] [plugin_id] ... [plugin_id] ------------------------------------ - -Each `plugin_id` can be any valid form for installing a single plugin (e.g., the -name of a core plugin, or a custom URL). - -For instance, to install the core <>, run the following command: - -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin install analysis-icu ------------------------------------ - -This command will install the versions of the plugins that matches your -Elasticsearch version. The installation will be treated as a transaction, so -that all the plugins will be installed, or none of the plugins will be installed -if any installation fails. - -[[mandatory-plugins]] -=== Mandatory plugins - -If you rely on some plugins, you can define mandatory plugins by adding -`plugin.mandatory` setting to the `config/elasticsearch.yml` file, for -example: - -[source,yaml] --------------------------------------------------- -plugin.mandatory: analysis-icu,lang-js --------------------------------------------------- - -For safety reasons, a node will not start if it is missing a mandatory plugin. - -[[listing-removing-updating]] -=== Listing, removing and updating installed plugins - -[discrete] -=== Listing plugins - -A list of the currently loaded plugins can be retrieved with the `list` option: - -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin list ------------------------------------ - -Alternatively, use the {ref}/cluster-nodes-info.html[node-info API] to find -out which plugins are installed on each node in the cluster - -[discrete] -=== Removing plugins - -Plugins can be removed manually, by deleting the appropriate directory under -`plugins/`, or using the public script: - -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin remove [pluginname] ------------------------------------ - -After a Java plugin has been removed, you will need to restart the node to -complete the removal process. - -By default, plugin configuration files (if any) are preserved on disk; this is -so that configuration is not lost while upgrading a plugin. If you wish to -purge the configuration files while removing a plugin, use `-p` or `--purge`. -This can option can be used after a plugin is removed to remove any lingering -configuration files. - -[discrete] -[[removing-multiple-plugins]] -=== Removing multiple plugins - -Multiple plugins can be removed in one invocation as follows: - -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin remove [pluginname] [pluginname] ... [pluginname] ------------------------------------ - -[discrete] -=== Updating plugins - -Except for text analysis plugins that are created using the -<>, plugins are built for a specific -version of {es}, and must be reinstalled each time {es} is updated. - -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin remove [pluginname] -sudo bin/elasticsearch-plugin install [pluginname] ------------------------------------ - -=== Other command line parameters - -The `plugin` scripts supports a number of other command line parameters: - -[discrete] -=== Silent/verbose mode - -The `--verbose` parameter outputs more debug information, while the `--silent` -parameter turns off all output including the progress bar. The script may -return the following exit codes: - -[horizontal] -`0`:: everything was OK -`64`:: unknown command or incorrect option parameter -`74`:: IO error -`70`:: any other error - -[discrete] -=== Batch mode - -Certain plugins require more privileges than those provided by default in core -Elasticsearch. These plugins will list the required privileges and ask the -user for confirmation before continuing with installation. - -When running the plugin install script from another program (e.g. install -automation scripts), the plugin script should detect that it is not being -called from the console and skip the confirmation response, automatically -granting all requested permissions. If console detection fails, then batch -mode can be forced by specifying `-b` or `--batch` as follows: - -[source,shell] ------------------------------------ -sudo bin/elasticsearch-plugin install --batch [pluginname] ------------------------------------ - -[discrete] -=== Custom config directory - -If your `elasticsearch.yml` config file is in a custom location, you will need -to specify the path to the config file when using the `plugin` script. You -can do this as follows: - -[source,sh] ---------------------- -sudo ES_PATH_CONF=/path/to/conf/dir bin/elasticsearch-plugin install ---------------------- - -[discrete] -=== Proxy settings - -To install a plugin via a proxy, you can add the proxy details to the -`CLI_JAVA_OPTS` environment variable with the Java settings `http.proxyHost` -and `http.proxyPort` (or `https.proxyHost` and `https.proxyPort`): - -[source,shell] ------------------------------------ -sudo CLI_JAVA_OPTS="-Dhttp.proxyHost=host_name -Dhttp.proxyPort=port_number -Dhttps.proxyHost=host_name -Dhttps.proxyPort=https_port_number" bin/elasticsearch-plugin install analysis-icu ------------------------------------ - -Or on Windows: - -[source,shell] ------------------------------------- -set CLI_JAVA_OPTS="-Dhttp.proxyHost=host_name -Dhttp.proxyPort=port_number -Dhttps.proxyHost=host_name -Dhttps.proxyPort=https_port_number" -bin\elasticsearch-plugin install analysis-icu ------------------------------------- - -=== Plugins directory - -The default location of the `plugins` directory depends on which package you install: - -* {ref}/targz.html#targz-layout[Directory layout of `.tar.gz` archives] -* {ref}/zip-windows.html#windows-layout[Directory layout of Windows `.zip` archives] -* {ref}/deb.html#deb-layout[Directory layout of Debian package] -* {ref}/rpm.html#rpm-layout[Directory layout of RPM] - - -[[manage-plugins-using-configuration-file]] -=== Manage plugins using a configuration file - -[IMPORTANT] -.Docker only -===================== -This feature is only available for https://www.docker.elastic.co/[official {es} -Docker images]. Other {es} distributions will not start with a -plugin configuration file. -===================== - -If you run {es} using Docker, you can manage plugins using a declarative configuration file. -When {es} starts up, it will compare the plugins in the file with those -that are currently installed, and add or remove plugins as required. {es} -will also upgrade official plugins when you upgrade {es} itself. - -The file is called `elasticsearch-plugins.yml`, and must be placed in the -Elasticsearch configuration directory, alongside `elasticsearch.yml`. Here -is an example: - -[source,yaml] ----- -plugins: - - id: analysis-icu - - id: repository-azure - - id: custom-mapper - location: https://example.com/archive/custom-mapper-1.0.0.zip ----- - -This example installs the official `analysis-icu` and -`repository-azure` plugins, and one unofficial plugin. Every plugin must provide -an `id`. Unofficial plugins must also provide a `location`. This is -typically a URL, but Maven coordinates are also supported. The downloaded -plugin's name must match the ID in the configuration file. - -While {es} will respect the -https://docs.oracle.com/javase/8/docs/technotes/guides/net/proxies.html[standard -Java proxy system properties] when downloading plugins, you can also configure an -HTTP proxy to use explicitly in the configuration file. For example: - -[source,yaml] ----- -plugins: - - id: custom-mapper - location: https://example.com/archive/custom-mapper-1.0.0.zip -proxy: proxy.example.com:8443 ----- diff --git a/docs/plugins/redirects.asciidoc b/docs/plugins/redirects.asciidoc deleted file mode 100644 index a97828a2e955a..0000000000000 --- a/docs/plugins/redirects.asciidoc +++ /dev/null @@ -1,161 +0,0 @@ -["appendix",role="exclude",id="redirects"] -= Deleted pages - -The following pages have moved or been deleted. - -[role="exclude",id="discovery-multicast"] -=== Multicast Discovery Plugin - -The `multicast-discovery` plugin has been removed. Instead, configure networking -using unicast (see {ref}/modules-network.html[Network settings]) or using -one of the <>. - -[role="exclude",id="cloud-aws"] -=== AWS Cloud Plugin - -Looking for a hosted solution for Elasticsearch on AWS? Check out https://www.elastic.co/cloud/. - -The Elasticsearch `cloud-aws` plugin has been split into two separate plugins: - -* <> (`discovery-ec2`) -* {ref}/repository-s3.html[`repository-s3`] - -[role="exclude",id="cloud-azure"] -=== Azure Cloud Plugin - -The `cloud-azure` plugin has been split into two separate plugins: - -* <> (`discovery-azure-classic`) -* {ref}/repository-azure.html[`repository-azure`] - - -[role="exclude",id="cloud-gce"] -=== GCE Cloud Plugin - -The `cloud-gce` plugin has been renamed to <> (`discovery-gce`). - -[role="exclude",id="plugins-delete-by-query"] -=== Delete-By-Query plugin removed - -The Delete-By-Query plugin has been removed in favor of a new {ref}/docs-delete-by-query.html[Delete By Query API] -implementation in core. - -[role="exclude",id="ingest-geoip"] -=== Ingest `geoip` processor plugin - -The `geoip` processor is now a module and distributed with {es} by default. See -{ref}/geoip-processor.html[GeoIP processor]. - -[role="exclude",id="ingest-user-agent"] -=== Ingest `user_agent` processor plugin - -The `user_agent` processor is now a module and distributed with {es} by default. -See {ref}/user-agent-processor.html[User Agent processor]. - -[role="exclude",id="using-ingest-geoip"] -=== Using the `geoip` processor in a pipeline - -See {ref}/geoip-processor.html#using-ingest-geoip[using `ingest-geoip`]. - -[role="exclude",id="alerting"] -=== Alerting plugins - -See {kib}'s {kibana-ref}/alerting-getting-started.html[Alerting and Actions]. - -[role="exclude",id="management"] -=== Management plugins - -See {ref}/monitor-elasticsearch-cluster.html[{stack} monitoring]. - -[role="exclude",id="security"] -=== Security plugins - -See {ref}/secure-cluster.html[{stack} security]. - -[role="exclude",id="repository-azure"] -=== Azure repository plugin - -// tag::azure-repo-migration[] -The Azure repository plugin is now included in {es}. -See {ref}/repository-azure.html[Azure repository]. -// end::azure-repo-migration[] - -[role="exclude",id="repository-azure-usage"] -=== Azure repository plugin - -include::redirects.asciidoc[tag=azure-repo-migration] - -[role="exclude",id="repository-azure-client-settings"] -=== Azure repository plugin - -include::redirects.asciidoc[tag=azure-repo-migration] - -[role="exclude",id="repository-azure-repository-settings"] -=== Azure repository plugin - -include::redirects.asciidoc[tag=azure-repo-migration] - -[role="exclude",id="repository-azure-validation"] -=== Azure repository plugin - -include::redirects.asciidoc[tag=azure-repo-migration] - -[role="exclude",id="repository-s3"] -=== S3 repository plugin - -// tag::s3-repo-migration[] -The S3 repository plugin is now included in {es}. -See {ref}/repository-s3.html[S3 repository]. -// end::s3-repo-migration[] - -[role="exclude",id="repository-s3-usage"] -=== S3 repository plugin - -include::redirects.asciidoc[tag=s3-repo-migration] - -[role="exclude",id="repository-s3-client"] -=== S3 repository plugin - -include::redirects.asciidoc[tag=s3-repo-migration] - -[role="exclude",id="repository-s3-repository"] -=== S3 repository plugin - -include::redirects.asciidoc[tag=s3-repo-migration] - -[role="exclude",id="repository-gcs"] -=== Google Cloud Storage repository plugin - -// tag::gcs-repo-migration[] -The Google Cloud Storage repository plugin is now included in {es}. -See {ref}/repository-gcs.html[Google Cloud Storage repository]. -// end::gcs-repo-migration[] - -[role="exclude",id="repository-gcs-usage"] -=== Google Cloud Storage repository plugin - -include::redirects.asciidoc[tag=gcs-repo-migration] - -[role="exclude",id="repository-gcs-client"] -=== Google Cloud Storage repository plugin - -include::redirects.asciidoc[tag=gcs-repo-migration] - -[role="exclude",id="repository-gcs-repository"] -=== Google Cloud Storage repository plugin - -include::redirects.asciidoc[tag=gcs-repo-migration] - -[role="exclude",id="ingest-attachment"] -=== Ingest Attachment plugin - -// tag::ingest-attachment-migration[] -The Ingest Attachment plugin is now included in {es}. -See the {ref}/attachment.html[Ingest Attachment] processor. -// end::ingest-attachment-migration[] - -[role="exclude",id="ingest"] -=== Ingest plugins - -All ingest functionality has been moved into the default distribution. -For more information refer to the {ref}/processors.html[existing processors]. diff --git a/docs/plugins/repository-hdfs.asciidoc b/docs/plugins/repository-hdfs.asciidoc deleted file mode 100644 index 0bebc3b071436..0000000000000 --- a/docs/plugins/repository-hdfs.asciidoc +++ /dev/null @@ -1,194 +0,0 @@ -[[repository-hdfs]] -=== Hadoop HDFS repository plugin - -The HDFS repository plugin adds support for using HDFS File System as a repository for -{ref}/snapshot-restore.html[Snapshot/Restore]. - -:plugin_name: repository-hdfs -include::install_remove.asciidoc[] - -[[repository-hdfs-usage]] -==== Getting started with HDFS - -The HDFS snapshot/restore plugin is built against the latest Apache Hadoop 2.x (currently 2.7.1). If the distro you are using is not protocol -compatible with Apache Hadoop, consider replacing the Hadoop libraries inside the plugin folder with your own (you might have to adjust the security permissions required). - -Even if Hadoop is already installed on the Elasticsearch nodes, for security reasons, the required libraries need to be placed under the plugin folder. Note that in most cases, if the distro is compatible, one simply needs to configure the repository with the appropriate Hadoop configuration files (see below). - -Windows Users:: -Using Apache Hadoop on Windows is problematic and thus it is not recommended. For those _really_ wanting to use it, make sure you place the elusive `winutils.exe` under the -plugin folder and point `HADOOP_HOME` variable to it; this should minimize the amount of permissions Hadoop requires (though one would still have to add some more). - -[[repository-hdfs-config]] -==== Configuration properties - -Once installed, define the configuration for the `hdfs` repository through the -{ref}/snapshot-restore.html[REST API]: - -[source,console] ----- -PUT _snapshot/my_hdfs_repository -{ - "type": "hdfs", - "settings": { - "uri": "hdfs://namenode:8020/", - "path": "elasticsearch/repositories/my_hdfs_repository", - "conf.dfs.client.read.shortcircuit": "true" - } -} ----- -// TEST[skip:we don't have hdfs set up while testing this] - -The following settings are supported: - -[horizontal] -`uri`:: - - The uri address for hdfs. ex: "hdfs://:/". (Required) - -`path`:: - - The file path within the filesystem where data is stored/loaded. ex: "path/to/file". (Required) - -`load_defaults`:: - - Whether to load the default Hadoop configuration or not. (Enabled by default) - -`conf.`:: - - Inlined configuration parameter to be added to Hadoop configuration. (Optional) - Only client oriented properties from the hadoop https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml[core] and https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml[hdfs] configuration files will be recognized by the plugin. - -`compress`:: - - Whether to compress the metadata or not. (Enabled by default) - -include::repository-shared-settings.asciidoc[] - -`chunk_size`:: - - Override the chunk size. (Disabled by default) - -`security.principal`:: - - Kerberos principal to use when connecting to a secured HDFS cluster. - If you are using a service principal for your elasticsearch node, you may - use the `_HOST` pattern in the principal name and the plugin will replace - the pattern with the hostname of the node at runtime (see - link:repository-hdfs-security-runtime[Creating the Secure Repository]). - -`replication_factor`:: - - The replication factor for all new HDFS files created by this repository. - Must be greater or equal to `dfs.replication.min` and less or equal to `dfs.replication.max` HDFS option. - Defaults to using HDFS cluster setting. - -[[repository-hdfs-availability]] -[discrete] -===== A note on HDFS availability -When you initialize a repository, its settings are persisted in the cluster state. When a node comes online, it will -attempt to initialize all repositories for which it has settings. If your cluster has an HDFS repository configured, then -all nodes in the cluster must be able to reach HDFS when starting. If not, then the node will fail to initialize the -repository at start up and the repository will be unusable. If this happens, you will need to remove and re-add the -repository or restart the offending node. - -[[repository-hdfs-security]] -==== Hadoop security - -The HDFS repository plugin integrates seamlessly with Hadoop's authentication model. The following authentication -methods are supported by the plugin: - -[horizontal] -`simple`:: - - Also means "no security" and is enabled by default. Uses information from underlying operating system account - running Elasticsearch to inform Hadoop of the name of the current user. Hadoop makes no attempts to verify this - information. - -`kerberos`:: - - Authenticates to Hadoop through the usage of a Kerberos principal and keytab. Interfacing with HDFS clusters - secured with Kerberos requires a few additional steps to enable (See <> and - <> for more info) - -[[repository-hdfs-security-keytabs]] -[discrete] -===== Principals and keytabs -Before attempting to connect to a secured HDFS cluster, provision the Kerberos principals and keytabs that the -Elasticsearch nodes will use for authenticating to Kerberos. For maximum security and to avoid tripping up the Kerberos -replay protection, you should create a service principal per node, following the pattern of -`elasticsearch/hostname@REALM`. - -WARNING: In some cases, if the same principal is authenticating from multiple clients at once, services may reject -authentication for those principals under the assumption that they could be replay attacks. If you are running the -plugin in production with multiple nodes you should be using a unique service principal for each node. - -On each Elasticsearch node, place the appropriate keytab file in the node's configuration location under the -`repository-hdfs` directory using the name `krb5.keytab`: - -[source, bash] ----- -$> cd elasticsearch/config -$> ls -elasticsearch.yml jvm.options log4j2.properties repository-hdfs/ scripts/ -$> cd repository-hdfs -$> ls -krb5.keytab ----- -// TEST[skip:this is for demonstration purposes only - -NOTE: Make sure you have the correct keytabs! If you are using a service principal per node (like -`elasticsearch/hostname@REALM`) then each node will need its own unique keytab file for the principal assigned to that -host! - -// Setup at runtime (principal name) -[[repository-hdfs-security-runtime]] -[discrete] -===== Creating the secure repository -Once your keytab files are in place and your cluster is started, creating a secured HDFS repository is simple. Just -add the name of the principal that you will be authenticating as in the repository settings under the -`security.principal` option: - -[source,console] ----- -PUT _snapshot/my_hdfs_repository -{ - "type": "hdfs", - "settings": { - "uri": "hdfs://namenode:8020/", - "path": "/user/elasticsearch/repositories/my_hdfs_repository", - "security.principal": "elasticsearch@REALM" - } -} ----- -// TEST[skip:we don't have hdfs set up while testing this] - -If you are using different service principals for each node, you can use the `_HOST` pattern in your principal -name. Elasticsearch will automatically replace the pattern with the hostname of the node at runtime: - -[source,console] ----- -PUT _snapshot/my_hdfs_repository -{ - "type": "hdfs", - "settings": { - "uri": "hdfs://namenode:8020/", - "path": "/user/elasticsearch/repositories/my_hdfs_repository", - "security.principal": "elasticsearch/_HOST@REALM" - } -} ----- -// TEST[skip:we don't have hdfs set up while testing this] - -[[repository-hdfs-security-authorization]] -[discrete] -===== Authorization -Once Elasticsearch is connected and authenticated to HDFS, HDFS will infer a username to use for -authorizing file access for the client. By default, it picks this username from the primary part of -the kerberos principal used to authenticate to the service. For example, in the case of a principal -like `elasticsearch@REALM` or `elasticsearch/hostname@REALM` then the username that HDFS -extracts for file access checks will be `elasticsearch`. - -NOTE: The repository plugin makes no assumptions of what Elasticsearch's principal name is. The main fragment of the -Kerberos principal is not required to be `elasticsearch`. If you have a principal or service name that works better -for you or your organization then feel free to use it instead! diff --git a/docs/plugins/repository-shared-settings.asciidoc b/docs/plugins/repository-shared-settings.asciidoc deleted file mode 100644 index e09f859c1c12f..0000000000000 --- a/docs/plugins/repository-shared-settings.asciidoc +++ /dev/null @@ -1,15 +0,0 @@ -`max_restore_bytes_per_sec`:: - - Throttles per node restore rate. Defaults to unlimited. - Note that restores are also throttled through {ref}/recovery.html[recovery settings]. - -`max_snapshot_bytes_per_sec`:: - - Throttles per node snapshot rate. Defaults to `40mb` per second. - Note that if the {ref}/recovery.html[recovery settings for managed services] - are set, then it defaults to unlimited, and the rate is additionally - throttled through {ref}/recovery.html[recovery settings]. - -`readonly`:: - - Makes repository read-only. Defaults to `false`. diff --git a/docs/plugins/repository.asciidoc b/docs/plugins/repository.asciidoc deleted file mode 100644 index fc0c5b3c411d4..0000000000000 --- a/docs/plugins/repository.asciidoc +++ /dev/null @@ -1,27 +0,0 @@ -[[repository]] -== Snapshot/restore repository plugins - -Repository plugins extend the {ref}/snapshot-restore.html[Snapshot/Restore] -functionality in Elasticsearch by adding repositories backed by the cloud or -by distributed file systems: - -[discrete] -==== Official repository plugins - -NOTE: Support for S3, GCS and Azure repositories is now bundled in {es} by -default. - -The official repository plugins are: - -<>:: - -The Hadoop HDFS Repository plugin adds support for using HDFS as a repository. - -[discrete] -=== Community contributed repository plugins - -The following plugin has been contributed by our community: - -* https://github.com/BigDataBoutique/elasticsearch-repository-swift[Openstack Swift] (by Wikimedia Foundation and BigData Boutique) - -include::repository-hdfs.asciidoc[] diff --git a/docs/plugins/store-smb.asciidoc b/docs/plugins/store-smb.asciidoc deleted file mode 100644 index da803b4f42022..0000000000000 --- a/docs/plugins/store-smb.asciidoc +++ /dev/null @@ -1,59 +0,0 @@ -[[store-smb]] -=== Store SMB plugin - -The Store SMB plugin works around for a bug in Windows SMB and Java on windows. - -:plugin_name: store-smb -include::install_remove.asciidoc[] - -[[store-smb-usage]] -==== Working around a bug in Windows SMB and Java on windows - -When using a shared file system based on the SMB protocol (like Azure File Service) to store indices, the way Lucene -opens index segment files is with a write only flag. This is the _correct_ way to open the files, as they will only be -used for writes and allows different FS implementations to optimize for it. Sadly, in windows with SMB, this disables -the cache manager, causing writes to be slow. This has been described in -https://issues.apache.org/jira/browse/LUCENE-6176[LUCENE-6176], but it affects each and every Java program out there!. -This need and must be fixed outside of ES and/or Lucene, either in windows or OpenJDK. For now, we are providing an -experimental support to open the files with read flag, but this should be considered experimental and the correct way -to fix it is in OpenJDK or Windows. - -The Store SMB plugin provides two storage types optimized for SMB: - -`smb_mmap_fs`:: - - a SMB specific implementation of the default - {ref}/index-modules-store.html#mmapfs[mmap fs] - -`smb_simple_fs`:: - - deprecated::[7.15,"smb_simple_fs is deprecated and will be removed in 8.0. Use smb_nio_fs or other file systems instead."] - -`smb_nio_fs`:: - - a SMB specific implementation of the default - {ref}/index-modules-store.html#niofs[nio fs] - -To use one of these specific storage types, you need to install the Store SMB plugin and restart the node. -Then configure Elasticsearch to set the storage type you want. - -This can be configured for all indices by adding this to the `elasticsearch.yml` file: - -[source,yaml] ----- -index.store.type: smb_nio_fs ----- - -Note that settings will be applied for newly created indices. - -It can also be set on a per-index basis at index creation time: - -[source,console] ----- -PUT my-index-000001 -{ - "settings": { - "index.store.type": "smb_mmap_fs" - } -} ----- diff --git a/docs/plugins/store.asciidoc b/docs/plugins/store.asciidoc deleted file mode 100644 index aa19cf6d8f36c..0000000000000 --- a/docs/plugins/store.asciidoc +++ /dev/null @@ -1,17 +0,0 @@ -[[store]] -== Store plugins - -Store plugins offer alternatives to default Lucene stores. - -[discrete] -=== Core store plugins - -The core store plugins are: - -<>:: - -The Store SMB plugin works around for a bug in Windows SMB and Java on windows. - - -include::store-smb.asciidoc[] - diff --git a/docs/reference/aggregations.asciidoc b/docs/reference/aggregations.asciidoc deleted file mode 100644 index 26b6a53da2bba..0000000000000 --- a/docs/reference/aggregations.asciidoc +++ /dev/null @@ -1,421 +0,0 @@ -[[search-aggregations]] -= Aggregations - -[partintro] --- -An aggregation summarizes your data as metrics, statistics, or other analytics. -Aggregations help you answer questions like: - -* What's the average load time for my website? -* Who are my most valuable customers based on transaction volume? -* What would be considered a large file on my network? -* How many products are in each product category? - -{es} organizes aggregations into three categories: - -* <> aggregations that calculate metrics, -such as a sum or average, from field values. - -* <> aggregations that -group documents into buckets, also called bins, based on field values, ranges, -or other criteria. - -* <> aggregations that take input from -other aggregations instead of documents or fields. - -[discrete] -[[run-an-agg]] -=== Run an aggregation - -You can run aggregations as part of a <> by specifying the <>'s `aggs` parameter. The -following search runs a -<> on -`my-field`: - -[source,console] ----- -GET /my-index-000001/_search -{ - "aggs": { - "my-agg-name": { - "terms": { - "field": "my-field" - } - } - } -} ----- -// TEST[setup:my_index] -// TEST[s/my-field/http.request.method/] - -Aggregation results are in the response's `aggregations` object: - -[source,console-result] ----- -{ - "took": 78, - "timed_out": false, - "_shards": { - "total": 1, - "successful": 1, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 5, - "relation": "eq" - }, - "max_score": 1.0, - "hits": [...] - }, - "aggregations": { - "my-agg-name": { <1> - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [] - } - } -} ----- -// TESTRESPONSE[s/"took": 78/"took": "$body.took"/] -// TESTRESPONSE[s/\.\.\.$/"took": "$body.took", "timed_out": false, "_shards": "$body._shards", /] -// TESTRESPONSE[s/"hits": \[\.\.\.\]/"hits": "$body.hits.hits"/] -// TESTRESPONSE[s/"buckets": \[\]/"buckets":\[\{"key":"get","doc_count":5\}\]/] - -<1> Results for the `my-agg-name` aggregation. - -[discrete] -[[change-agg-scope]] -=== Change an aggregation's scope - -Use the `query` parameter to limit the documents on which an aggregation runs: - -[source,console] ----- -GET /my-index-000001/_search -{ - "query": { - "range": { - "@timestamp": { - "gte": "now-1d/d", - "lt": "now/d" - } - } - }, - "aggs": { - "my-agg-name": { - "terms": { - "field": "my-field" - } - } - } -} ----- -// TEST[setup:my_index] -// TEST[s/my-field/http.request.method/] - -[discrete] -[[return-only-agg-results]] -=== Return only aggregation results - -By default, searches containing an aggregation return both search hits and -aggregation results. To return only aggregation results, set `size` to `0`: - -[source,console] ----- -GET /my-index-000001/_search -{ - "size": 0, - "aggs": { - "my-agg-name": { - "terms": { - "field": "my-field" - } - } - } -} ----- -// TEST[setup:my_index] -// TEST[s/my-field/http.request.method/] - -[discrete] -[[run-multiple-aggs]] -=== Run multiple aggregations - -You can specify multiple aggregations in the same request: - -[source,console] ----- -GET /my-index-000001/_search -{ - "aggs": { - "my-first-agg-name": { - "terms": { - "field": "my-field" - } - }, - "my-second-agg-name": { - "avg": { - "field": "my-other-field" - } - } - } -} ----- -// TEST[setup:my_index] -// TEST[s/my-field/http.request.method/] -// TEST[s/my-other-field/http.response.bytes/] - -[discrete] -[[run-sub-aggs]] -=== Run sub-aggregations - -Bucket aggregations support bucket or metric sub-aggregations. For example, a -terms aggregation with an <> -sub-aggregation calculates an average value for each bucket of documents. There -is no level or depth limit for nesting sub-aggregations. - -[source,console] ----- -GET /my-index-000001/_search -{ - "aggs": { - "my-agg-name": { - "terms": { - "field": "my-field" - }, - "aggs": { - "my-sub-agg-name": { - "avg": { - "field": "my-other-field" - } - } - } - } - } -} ----- -// TEST[setup:my_index] -// TEST[s/_search/_search?size=0/] -// TEST[s/my-field/http.request.method/] -// TEST[s/my-other-field/http.response.bytes/] - -The response nests sub-aggregation results under their parent aggregation: - -[source,console-result] ----- -{ - ... - "aggregations": { - "my-agg-name": { <1> - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "foo", - "doc_count": 5, - "my-sub-agg-name": { <2> - "value": 75.0 - } - } - ] - } - } -} ----- -// TESTRESPONSE[s/\.\.\./"took": "$body.took", "timed_out": false, "_shards": "$body._shards", "hits": "$body.hits",/] -// TESTRESPONSE[s/"key": "foo"/"key": "get"/] -// TESTRESPONSE[s/"value": 75.0/"value": $body.aggregations.my-agg-name.buckets.0.my-sub-agg-name.value/] - -<1> Results for the parent aggregation, `my-agg-name`. -<2> Results for `my-agg-name`'s sub-aggregation, `my-sub-agg-name`. - -[discrete] -[[add-metadata-to-an-agg]] -=== Add custom metadata - -Use the `meta` object to associate custom metadata with an aggregation: - -[source,console] ----- -GET /my-index-000001/_search -{ - "aggs": { - "my-agg-name": { - "terms": { - "field": "my-field" - }, - "meta": { - "my-metadata-field": "foo" - } - } - } -} ----- -// TEST[setup:my_index] -// TEST[s/_search/_search?size=0/] - -The response returns the `meta` object in place: - -[source,console-result] ----- -{ - ... - "aggregations": { - "my-agg-name": { - "meta": { - "my-metadata-field": "foo" - }, - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [] - } - } -} ----- -// TESTRESPONSE[s/\.\.\./"took": "$body.took", "timed_out": false, "_shards": "$body._shards", "hits": "$body.hits",/] - -[discrete] -[[return-agg-type]] -=== Return the aggregation type - -By default, aggregation results include the aggregation's name but not its type. -To return the aggregation type, use the `typed_keys` query parameter. - -[source,console] ----- -GET /my-index-000001/_search?typed_keys -{ - "aggs": { - "my-agg-name": { - "histogram": { - "field": "my-field", - "interval": 1000 - } - } - } -} ----- -// TEST[setup:my_index] -// TEST[s/typed_keys/typed_keys&size=0/] -// TEST[s/my-field/http.response.bytes/] - -The response returns the aggregation type as a prefix to the aggregation's name. - -IMPORTANT: Some aggregations return a different aggregation type from the -type in the request. For example, the terms, -<>, -and <> -aggregations return different aggregations types depending on the data type of -the aggregated field. - -[source,console-result] ----- -{ - ... - "aggregations": { - "histogram#my-agg-name": { <1> - "buckets": [] - } - } -} ----- -// TESTRESPONSE[s/\.\.\./"took": "$body.took", "timed_out": false, "_shards": "$body._shards", "hits": "$body.hits",/] -// TESTRESPONSE[s/"buckets": \[\]/"buckets":\[\{"key":1070000.0,"doc_count":5\}\]/] - -<1> The aggregation type, `histogram`, followed by a `#` separator and the aggregation's name, `my-agg-name`. - -[discrete] -[[use-scripts-in-an-agg]] -=== Use scripts in an aggregation - -When a field doesn't exactly match the aggregation you need, you -should aggregate on a <>: - -[source,console] ----- -GET /my-index-000001/_search?size=0 -{ - "runtime_mappings": { - "message.length": { - "type": "long", - "script": "emit(doc['message.keyword'].value.length())" - } - }, - "aggs": { - "message_length": { - "histogram": { - "interval": 10, - "field": "message.length" - } - } - } -} ----- -// TEST[setup:my_index] - -//// -[source,console-result] ----- -{ - "timed_out": false, - "took": "$body.took", - "_shards": { - "total": 1, - "successful": 1, - "failed": 0, - "skipped": 0 - }, - "hits": "$body.hits", - "aggregations": { - "message_length": { - "buckets": [ - { - "key": 30.0, - "doc_count": 5 - } - ] - } - } -} ----- -//// - -Scripts calculate field values dynamically, which adds a little -overhead to the aggregation. In addition to the time spent calculating, -some aggregations like <> -and <> can't use -some of their optimizations with runtime fields. In total, performance costs -for using a runtime field varies from aggregation to aggregation. - -// TODO when we have calculated fields we can link to them here. - -[discrete] -[[agg-caches]] -=== Aggregation caches - -For faster responses, {es} caches the results of frequently run aggregations in -the <>. To get cached results, use the -same <> for each search. If you -don't need search hits, <> to avoid -filling the cache. - -{es} routes searches with the same preference string to the same shards. If the -shards' data doesn’t change between searches, the shards return cached -aggregation results. - -[discrete] -[[limits-for-long-values]] -=== Limits for `long` values - -When running aggregations, {es} uses <> values to hold and -represent numeric data. As a result, aggregations on <> numbers -greater than +2^53^+ are approximate. --- - -include::aggregations/bucket.asciidoc[] - -include::aggregations/metrics.asciidoc[] - -include::aggregations/pipeline.asciidoc[] diff --git a/docs/reference/aggregations/bucket.asciidoc b/docs/reference/aggregations/bucket.asciidoc deleted file mode 100644 index 215bd57ae5476..0000000000000 --- a/docs/reference/aggregations/bucket.asciidoc +++ /dev/null @@ -1,86 +0,0 @@ -[[search-aggregations-bucket]] -== Bucket aggregations - -Bucket aggregations don't calculate metrics over fields like the metrics aggregations do, but instead, they create -buckets of documents. Each bucket is associated with a criterion (depending on the aggregation type) which determines -whether or not a document in the current context "falls" into it. In other words, the buckets effectively define document -sets. In addition to the buckets themselves, the `bucket` aggregations also compute and return the number of documents -that "fell into" each bucket. - -Bucket aggregations, as opposed to `metrics` aggregations, can hold sub-aggregations. These sub-aggregations will be -aggregated for the buckets created by their "parent" bucket aggregation. - -There are different bucket aggregators, each with a different "bucketing" strategy. Some define a single bucket, some -define fixed number of multiple buckets, and others dynamically create the buckets during the aggregation process. - -NOTE: The <> cluster setting -limits the number of buckets allowed in a single response. - -include::bucket/adjacency-matrix-aggregation.asciidoc[] - -include::bucket/autodatehistogram-aggregation.asciidoc[] - -include::bucket/categorize-text-aggregation.asciidoc[] - -include::bucket/children-aggregation.asciidoc[] - -include::bucket/composite-aggregation.asciidoc[] - -include::bucket/datehistogram-aggregation.asciidoc[] - -include::bucket/daterange-aggregation.asciidoc[] - -include::bucket/diversified-sampler-aggregation.asciidoc[] - -include::bucket/filter-aggregation.asciidoc[] - -include::bucket/filters-aggregation.asciidoc[] - -include::bucket/frequent-item-sets-aggregation.asciidoc[] - -include::bucket/geodistance-aggregation.asciidoc[] - -include::bucket/geohashgrid-aggregation.asciidoc[] - -include::bucket/geohexgrid-aggregation.asciidoc[] - -include::bucket/geotilegrid-aggregation.asciidoc[] - -include::bucket/global-aggregation.asciidoc[] - -include::bucket/histogram-aggregation.asciidoc[] - -include::bucket/ipprefix-aggregation.asciidoc[] - -include::bucket/iprange-aggregation.asciidoc[] - -include::bucket/missing-aggregation.asciidoc[] - -include::bucket/multi-terms-aggregation.asciidoc[] - -include::bucket/nested-aggregation.asciidoc[] - -include::bucket/parent-aggregation.asciidoc[] - -include::bucket/random-sampler-aggregation.asciidoc[] - -include::bucket/range-aggregation.asciidoc[] - -include::bucket/rare-terms-aggregation.asciidoc[] - -include::bucket/reverse-nested-aggregation.asciidoc[] - -include::bucket/sampler-aggregation.asciidoc[] - -include::bucket/significantterms-aggregation.asciidoc[] - -include::bucket/significanttext-aggregation.asciidoc[] - -include::bucket/terms-aggregation.asciidoc[] - -// PREVIEW -include::bucket/time-series-aggregation.asciidoc[] - -include::bucket/variablewidthhistogram-aggregation.asciidoc[] - -include::bucket/range-field-note.asciidoc[] diff --git a/docs/reference/aggregations/bucket/adjacency-matrix-aggregation.asciidoc b/docs/reference/aggregations/bucket/adjacency-matrix-aggregation.asciidoc deleted file mode 100644 index 45750751d5987..0000000000000 --- a/docs/reference/aggregations/bucket/adjacency-matrix-aggregation.asciidoc +++ /dev/null @@ -1,152 +0,0 @@ -[[search-aggregations-bucket-adjacency-matrix-aggregation]] -=== Adjacency matrix aggregation -++++ -Adjacency matrix -++++ - -A bucket aggregation returning a form of {wikipedia}/Adjacency_matrix[adjacency matrix]. -The request provides a collection of named filter expressions, similar to the `filters` aggregation -request. -Each bucket in the response represents a non-empty cell in the matrix of intersecting filters. - -Given filters named `A`, `B` and `C` the response would return buckets with the following names: - - -[options="header"] -|======================= -| h|A h|B h|C -h|A |A |A&B |A&C -h|B | |B |B&C -h|C | | |C -|======================= - -The intersecting buckets e.g `A&C` are labelled using a combination of the two filter names with a default separator -of `&`. Note that the response does not also include a `C&A` bucket as this would be the -same set of documents as `A&C`. The matrix is said to be _symmetric_ so we only return half of it. To do this we sort -the filter name strings and always use the lowest of a pair as the value to the left of the separator. - - -[[adjacency-matrix-agg-ex]] -==== Example - -The following `interactions` aggregation uses `adjacency_matrix` to determine -which groups of individuals exchanged emails. - -[source,console,id=adjacency-matrix-aggregation-example] --------------------------------------------------- -PUT emails/_bulk?refresh -{ "index" : { "_id" : 1 } } -{ "accounts" : ["hillary", "sidney"]} -{ "index" : { "_id" : 2 } } -{ "accounts" : ["hillary", "donald"]} -{ "index" : { "_id" : 3 } } -{ "accounts" : ["vladimir", "donald"]} - -GET emails/_search -{ - "size": 0, - "aggs" : { - "interactions" : { - "adjacency_matrix" : { - "filters" : { - "grpA" : { "terms" : { "accounts" : ["hillary", "sidney"] }}, - "grpB" : { "terms" : { "accounts" : ["donald", "mitt"] }}, - "grpC" : { "terms" : { "accounts" : ["vladimir", "nigel"] }} - } - } - } - } -} --------------------------------------------------- - -The response contains buckets with document counts for each filter and -combination of filters. Buckets with no matching documents are excluded from the -response. - -[source,console-result] --------------------------------------------------- -{ - "took": 9, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "interactions": { - "buckets": [ - { - "key":"grpA", - "doc_count": 2 - }, - { - "key":"grpA&grpB", - "doc_count": 1 - }, - { - "key":"grpB", - "doc_count": 2 - }, - { - "key":"grpB&grpC", - "doc_count": 1 - }, - { - "key":"grpC", - "doc_count": 1 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 9/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -[role="child_attributes"] -[[adjacency-matrix-agg-params]] -==== Parameters - -`filters`:: -(Required, object) -Filters used to create buckets. -+ -.Properties of `filters` -[%collapsible%open] -==== -``:: -(Required, <>) -Query used to filter documents. The key is the filter name. -+ -At least one filter is required. The total number of filters cannot exceed the -<> -setting. See <>. -==== - -`separator`:: -(Optional, string) -Separator used to concatenate filter names. Defaults to `&`. - -[[adjacency-matrix-agg-response]] -==== Response body - -`key`:: -(string) -Filters for the bucket. If the bucket uses multiple filters, filter names are -concatenated using a `separator`. - -`doc_count`:: -(integer) -Number of documents matching the bucket's filters. - -[[adjacency-matrix-agg-usage]] -==== Usage -On its own this aggregation can provide all of the data required to create an undirected weighted graph. -However, when used with child aggregations such as a `date_histogram` the results can provide the -additional levels of data required to perform {wikipedia}/Dynamic_network_analysis[dynamic network analysis] -where examining interactions _over time_ becomes important. - -[[adjacency-matrix-agg-filter-limits]] -==== Filter limits -For N filters the matrix of buckets produced can be N²/2 which can be costly. -The circuit breaker settings prevent results producing too many buckets and to avoid excessive disk seeks -the `indices.query.bool.max_clause_count` setting is used to limit the number of filters. diff --git a/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc deleted file mode 100644 index c1356145b4269..0000000000000 --- a/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc +++ /dev/null @@ -1,310 +0,0 @@ -[[search-aggregations-bucket-autodatehistogram-aggregation]] -=== Auto-interval date histogram aggregation -++++ -Auto-interval date histogram -++++ - -A multi-bucket aggregation similar to the <> except -instead of providing an interval to use as the width of each bucket, a target number of buckets is provided -indicating the number of buckets needed and the interval of the buckets is automatically chosen to best achieve -that target. The number of buckets returned will always be less than or equal to this target number. - -The buckets field is optional, and will default to 10 buckets if not specified. - -Requesting a target of 10 buckets. - -[source,console,id=autodatehistogram-aggregation-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "sales_over_time": { - "auto_date_histogram": { - "field": "date", - "buckets": 10 - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -==== Keys - -Internally, a date is represented as a 64 bit number representing a timestamp -in milliseconds-since-the-epoch. These timestamps are returned as the bucket -++key++s. The `key_as_string` is the same timestamp converted to a formatted -date string using the format specified with the `format` parameter: - -TIP: If no `format` is specified, then it will use the first date -<> specified in the field mapping. - -[source,console,id=autodatehistogram-aggregation-format-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "sales_over_time": { - "auto_date_histogram": { - "field": "date", - "buckets": 5, - "format": "yyyy-MM-dd" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> Supports expressive date <> - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "sales_over_time": { - "buckets": [ - { - "key_as_string": "2015-01-01", - "key": 1420070400000, - "doc_count": 3 - }, - { - "key_as_string": "2015-02-01", - "key": 1422748800000, - "doc_count": 2 - }, - { - "key_as_string": "2015-03-01", - "key": 1425168000000, - "doc_count": 2 - } - ], - "interval": "1M" - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -==== Intervals - -The interval of the returned buckets is selected based on the data collected by the -aggregation so that the number of buckets returned is less than or equal to the number -requested. The possible intervals returned are: - -[horizontal] -seconds:: In multiples of 1, 5, 10 and 30 -minutes:: In multiples of 1, 5, 10 and 30 -hours:: In multiples of 1, 3 and 12 -days:: In multiples of 1, and 7 -months:: In multiples of 1, and 3 -years:: In multiples of 1, 5, 10, 20, 50 and 100 - -In the worst case, where the number of daily buckets are too many for the requested -number of buckets, the number of buckets returned will be 1/7th of the number of -buckets requested. - -==== Time Zone - -Date-times are stored in Elasticsearch in UTC. By default, all bucketing and -rounding is also done in UTC. The `time_zone` parameter can be used to indicate -that bucketing should use a different time zone. - -Time zones may either be specified as an ISO 8601 UTC offset (e.g. `+01:00` or -`-08:00`) or as a timezone id, an identifier used in the TZ database like -`America/Los_Angeles`. - -Consider the following example: - -[source,console,id=autodatehistogram-aggregation-timezone-example] ---------------------------------- -PUT my-index-000001/_doc/1?refresh -{ - "date": "2015-10-01T00:30:00Z" -} - -PUT my-index-000001/_doc/2?refresh -{ - "date": "2015-10-01T01:30:00Z" -} - -PUT my-index-000001/_doc/3?refresh -{ - "date": "2015-10-01T02:30:00Z" -} - -GET my-index-000001/_search?size=0 -{ - "aggs": { - "by_day": { - "auto_date_histogram": { - "field": "date", - "buckets" : 3 - } - } - } -} ---------------------------------- - -UTC is used if no time zone is specified, three 1-hour buckets are returned -starting at midnight UTC on 1 October 2015: - -[source,console-result] ---------------------------------- -{ - ... - "aggregations": { - "by_day": { - "buckets": [ - { - "key_as_string": "2015-10-01T00:00:00.000Z", - "key": 1443657600000, - "doc_count": 1 - }, - { - "key_as_string": "2015-10-01T01:00:00.000Z", - "key": 1443661200000, - "doc_count": 1 - }, - { - "key_as_string": "2015-10-01T02:00:00.000Z", - "key": 1443664800000, - "doc_count": 1 - } - ], - "interval": "1h" - } - } -} ---------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -If a `time_zone` of `-01:00` is specified, then midnight starts at one hour before -midnight UTC: - -[source,console] ---------------------------------- -GET my-index-000001/_search?size=0 -{ - "aggs": { - "by_day": { - "auto_date_histogram": { - "field": "date", - "buckets" : 3, - "time_zone": "-01:00" - } - } - } -} ---------------------------------- -// TEST[continued] - - -Now three 1-hour buckets are still returned but the first bucket starts at -11:00pm on 30 September 2015 since that is the local time for the bucket in -the specified time zone. - -[source,console-result] ---------------------------------- -{ - ... - "aggregations": { - "by_day": { - "buckets": [ - { - "key_as_string": "2015-09-30T23:00:00.000-01:00", <1> - "key": 1443657600000, - "doc_count": 1 - }, - { - "key_as_string": "2015-10-01T00:00:00.000-01:00", - "key": 1443661200000, - "doc_count": 1 - }, - { - "key_as_string": "2015-10-01T01:00:00.000-01:00", - "key": 1443664800000, - "doc_count": 1 - } - ], - "interval": "1h" - } - } -} ---------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -<1> The `key_as_string` value represents midnight on each day - in the specified time zone. - -WARNING: When using time zones that follow DST (daylight savings time) changes, -buckets close to the moment when those changes happen can have slightly different -sizes than neighbouring buckets. -For example, consider a DST start in the `CET` time zone: on 27 March 2016 at 2am, -clocks were turned forward 1 hour to 3am local time. If the result of the aggregation -was daily buckets, the bucket covering that day will only hold data for 23 hours -instead of the usual 24 hours for other buckets. The same is true for shorter intervals -like e.g. 12h. Here, we will have only a 11h bucket on the morning of 27 March when the -DST shift happens. - -==== Minimum Interval parameter - -The `minimum_interval` allows the caller to specify the minimum rounding interval that should be used. -This can make the collection process more efficient, as the aggregation will not attempt to round at -any interval lower than `minimum_interval`. - -The accepted units for `minimum_interval` are: - -* year -* month -* day -* hour -* minute -* second - -[source,console,id=autodatehistogram-aggregation-minimum-interval-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "sale_date": { - "auto_date_histogram": { - "field": "date", - "buckets": 10, - "minimum_interval": "minute" - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they -had a value. - -[source,console,id=autodatehistogram-aggregation-missing-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "sale_date": { - "auto_date_histogram": { - "field": "date", - "buckets": 10, - "missing": "2000/01/01" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> Documents without a value in the `publish_date` field will fall into the same bucket as documents that have the value `2000-01-01`. - diff --git a/docs/reference/aggregations/bucket/categorize-text-aggregation.asciidoc b/docs/reference/aggregations/bucket/categorize-text-aggregation.asciidoc deleted file mode 100644 index 399e6ed87581e..0000000000000 --- a/docs/reference/aggregations/bucket/categorize-text-aggregation.asciidoc +++ /dev/null @@ -1,534 +0,0 @@ -[[search-aggregations-bucket-categorize-text-aggregation]] -=== Categorize text aggregation -++++ -Categorize text -++++ - -A multi-bucket aggregation that groups semi-structured text into buckets. Each `text` field is re-analyzed -using a custom analyzer. The resulting tokens are then categorized creating buckets of similarly formatted -text values. This aggregation works best with machine generated text like system logs. Only the first 100 analyzed -tokens are used to categorize the text. - -NOTE: If you have considerable memory allocated to your JVM but are receiving circuit breaker exceptions from this - aggregation, you may be attempting to categorize text that is poorly formatted for categorization. Consider - adding `categorization_filters` or running under <>, - <>, or - <> to explore the created categories. - -NOTE: The algorithm used for categorization was completely changed in version 8.3.0. As a result this aggregation - will not work in a mixed version cluster where some nodes are on version 8.3.0 or higher and others are - on a version older than 8.3.0. Upgrade all nodes in your cluster to the same version if you experience - an error related to this change. - -[[bucket-categorize-text-agg-syntax]] -==== Parameters - -`categorization_analyzer`:: -(Optional, object or string) -The categorization analyzer specifies how the text is analyzed and tokenized before -being categorized. The syntax is very similar to that used to define the `analyzer` in the -<>. This -property cannot be used at the same time as `categorization_filters`. -+ -The `categorization_analyzer` field can be specified either as a string or as an -object. If it is a string it must refer to a -<> or one added by another plugin. If it -is an object it has the following properties: -+ -.Properties of `categorization_analyzer` -[%collapsible%open] -===== -`char_filter`:::: -(array of strings or objects) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=char-filter] - -`tokenizer`:::: -(string or object) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=tokenizer] - -`filter`:::: -(array of strings or objects) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=filter] -===== - -`categorization_filters`:: -(Optional, array of strings) -This property expects an array of regular expressions. The expressions -are used to filter out matching sequences from the categorization field values. -You can use this functionality to fine tune the categorization by excluding -sequences from consideration when categories are defined. For example, you can -exclude SQL statements that appear in your log files. This -property cannot be used at the same time as `categorization_analyzer`. If you -only want to define simple regular expression filters that are applied prior to -tokenization, setting this property is the easiest method. If you also want to -customize the tokenizer or post-tokenization filtering, use the -`categorization_analyzer` property instead and include the filters as -`pattern_replace` character filters. - -`field`:: -(Required, string) -The semi-structured text field to categorize. - -`max_matched_tokens`:: -(Optional, integer) -This parameter does nothing now, but is permitted for compatibility with the original -pre-8.3.0 implementation. - -`max_unique_tokens`:: -(Optional, integer) -This parameter does nothing now, but is permitted for compatibility with the original -pre-8.3.0 implementation. - -`min_doc_count`:: -(Optional, integer) -The minimum number of documents for a bucket to be returned to the results. - -`shard_min_doc_count`:: -(Optional, integer) -The minimum number of documents for a bucket to be returned from the shard before -merging. - -`shard_size`:: -(Optional, integer) -The number of categorization buckets to return from each shard before merging -all the results. - -`similarity_threshold`:: -(Optional, integer, default: `70`) -The minimum percentage of token weight that must match for text to be added to the -category bucket. -Must be between 1 and 100. The larger the value the narrower the categories. -Larger values will increase memory usage and create narrower categories. - -`size`:: -(Optional, integer, default: `10`) -The number of buckets to return. - -[[bucket-categorize-text-agg-response]] -==== Response body - -`key`:: -(string) -Consists of the tokens (extracted by the `categorization_analyzer`) -that are common to all values of the input field included in the category. - -`doc_count`:: -(integer) -Number of documents matching the category. - -`max_matching_length`:: -(integer) -Categories from short messages containing few tokens may also match -categories containing many tokens derived from much longer messages. -`max_matching_length` is an indication of the maximum length of messages -that should be considered to belong to the category. When searching for -messages that match the category, any messages longer than -`max_matching_length` should be excluded. Use this field to prevent a -search for members of a category of short messages from matching much longer -ones. - -`regex`:: -(string) -A regular expression that will match all values of the input field included -in the category. It is possible that the `regex` does not incorporate every -term in `key`, if ordering varies between the values included in the -category. However, in simple cases the `regex` will be the ordered terms -concatenated into a regular expression that allows for arbitrary sections -in between them. It is not recommended to use the `regex` as the primary -mechanism for searching for the original documents that were categorized. -Search using a regular expression is very slow. Instead the terms in the -`key` field should be used to search for matching documents, as a terms -search can use the inverted index and hence be much faster. However, there -may be situations where it is useful to use the `regex` field to test whether -a small set of messages that have not been indexed match the category, or to -confirm that the terms in the `key` occur in the correct order in all the -matched documents. - -==== Basic use - -WARNING: Re-analyzing _large_ result sets will require a lot of time and memory. This aggregation should be - used in conjunction with <>. Additionally, you may consider - using the aggregation as a child of either the <> or - <> aggregation. - This will typically improve speed and memory use. - -Example: - -[source,console] --------------------------------------------------- -POST log-messages/_search?filter_path=aggregations -{ - "aggs": { - "categories": { - "categorize_text": { - "field": "message" - } - } - } -} --------------------------------------------------- -// TEST[setup:categorize_text] - -Response: - -[source,console-result] --------------------------------------------------- -{ - "aggregations" : { - "categories" : { - "buckets" : [ - { - "doc_count" : 3, - "key" : "Node shutting down", - "regex" : ".*?Node.+?shutting.+?down.*?", - "max_matching_length" : 49 - }, - { - "doc_count" : 1, - "key" : "Node starting up", - "regex" : ".*?Node.+?starting.+?up.*?", - "max_matching_length" : 47 - }, - { - "doc_count" : 1, - "key" : "User foo_325 logging on", - "regex" : ".*?User.+?foo_325.+?logging.+?on.*?", - "max_matching_length" : 52 - }, - { - "doc_count" : 1, - "key" : "User foo_864 logged off", - "regex" : ".*?User.+?foo_864.+?logged.+?off.*?", - "max_matching_length" : 52 - } - ] - } - } -} --------------------------------------------------- - -Here is an example using `categorization_filters` - -[source,console] --------------------------------------------------- -POST log-messages/_search?filter_path=aggregations -{ - "aggs": { - "categories": { - "categorize_text": { - "field": "message", - "categorization_filters": ["\\w+\\_\\d{3}"] <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:categorize_text] - -<1> The filters to apply to the analyzed tokens. It filters - out tokens like `bar_123`. - -Note how the `foo_` tokens are not part of the -category results - -[source,console-result] --------------------------------------------------- -{ - "aggregations" : { - "categories" : { - "buckets" : [ - { - "doc_count" : 3, - "key" : "Node shutting down", - "regex" : ".*?Node.+?shutting.+?down.*?", - "max_matching_length" : 49 - }, - { - "doc_count" : 1, - "key" : "Node starting up", - "regex" : ".*?Node.+?starting.+?up.*?", - "max_matching_length" : 47 - }, - { - "doc_count" : 1, - "key" : "User logged off", - "regex" : ".*?User.+?logged.+?off.*?", - "max_matching_length" : 52 - }, - { - "doc_count" : 1, - "key" : "User logging on", - "regex" : ".*?User.+?logging.+?on.*?", - "max_matching_length" : 52 - } - ] - } - } -} --------------------------------------------------- - -Here is an example using `categorization_filters`. -The default analyzer uses the `ml_standard` tokenizer which is similar to a whitespace tokenizer -but filters out tokens that could be interpreted as hexadecimal numbers. The default analyzer -also uses the `first_line_with_letters` character filter, so that only the first meaningful line -of multi-line messages is considered. -But, it may be that a token is a known highly-variable token (formatted usernames, emails, etc.). In that case, it is good to supply -custom `categorization_filters` to filter out those tokens for better categories. These filters may also reduce memory usage as fewer -tokens are held in memory for the categories. (If there are sufficient examples of different usernames, emails, etc., then -categories will form that naturally discard them as variables, but for small input data where only one example exists this won't -happen.) - -[source,console] --------------------------------------------------- -POST log-messages/_search?filter_path=aggregations -{ - "aggs": { - "categories": { - "categorize_text": { - "field": "message", - "categorization_filters": ["\\w+\\_\\d{3}"], <1> - "similarity_threshold": 11 <2> - } - } - } -} --------------------------------------------------- -// TEST[setup:categorize_text] -<1> The filters to apply to the analyzed tokens. It filters -out tokens like `bar_123`. -<2> Require 11% of token weight to match before adding a message to an - existing category rather than creating a new one. - -The resulting categories are now very broad, merging the log groups. -(A `similarity_threshold` of 11% is generally too low. Settings over -50% are usually better.) - -[source,console-result] --------------------------------------------------- -{ - "aggregations" : { - "categories" : { - "buckets" : [ - { - "doc_count" : 4, - "key" : "Node", - "regex" : ".*?Node.*?", - "max_matching_length" : 49 - }, - { - "doc_count" : 2, - "key" : "User", - "regex" : ".*?User.*?", - "max_matching_length" : 52 - } - ] - } - } -} --------------------------------------------------- - -This aggregation can have both sub-aggregations and itself be a sub-aggregation. This allows gathering the top daily categories and the -top sample doc as below. - -[source,console] --------------------------------------------------- -POST log-messages/_search?filter_path=aggregations -{ - "aggs": { - "daily": { - "date_histogram": { - "field": "time", - "fixed_interval": "1d" - }, - "aggs": { - "categories": { - "categorize_text": { - "field": "message", - "categorization_filters": ["\\w+\\_\\d{3}"] - }, - "aggs": { - "hit": { - "top_hits": { - "size": 1, - "sort": ["time"], - "_source": "message" - } - } - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:categorize_text] - -[source,console-result] --------------------------------------------------- -{ - "aggregations" : { - "daily" : { - "buckets" : [ - { - "key_as_string" : "2016-02-07T00:00:00.000Z", - "key" : 1454803200000, - "doc_count" : 3, - "categories" : { - "buckets" : [ - { - "doc_count" : 2, - "key" : "Node shutting down", - "regex" : ".*?Node.+?shutting.+?down.*?", - "max_matching_length" : 49, - "hit" : { - "hits" : { - "total" : { - "value" : 2, - "relation" : "eq" - }, - "max_score" : null, - "hits" : [ - { - "_index" : "log-messages", - "_id" : "1", - "_score" : null, - "_source" : { - "message" : "2016-02-07T00:00:00+0000 Node 3 shutting down" - }, - "sort" : [ - 1454803260000 - ] - } - ] - } - } - }, - { - "doc_count" : 1, - "key" : "Node starting up", - "regex" : ".*?Node.+?starting.+?up.*?", - "max_matching_length" : 47, - "hit" : { - "hits" : { - "total" : { - "value" : 1, - "relation" : "eq" - }, - "max_score" : null, - "hits" : [ - { - "_index" : "log-messages", - "_id" : "2", - "_score" : null, - "_source" : { - "message" : "2016-02-07T00:00:00+0000 Node 5 starting up" - }, - "sort" : [ - 1454803320000 - ] - } - ] - } - } - } - ] - } - }, - { - "key_as_string" : "2016-02-08T00:00:00.000Z", - "key" : 1454889600000, - "doc_count" : 3, - "categories" : { - "buckets" : [ - { - "doc_count" : 1, - "key" : "Node shutting down", - "regex" : ".*?Node.+?shutting.+?down.*?", - "max_matching_length" : 49, - "hit" : { - "hits" : { - "total" : { - "value" : 1, - "relation" : "eq" - }, - "max_score" : null, - "hits" : [ - { - "_index" : "log-messages", - "_id" : "4", - "_score" : null, - "_source" : { - "message" : "2016-02-08T00:00:00+0000 Node 5 shutting down" - }, - "sort" : [ - 1454889660000 - ] - } - ] - } - } - }, - { - "doc_count" : 1, - "key" : "User logged off", - "regex" : ".*?User.+?logged.+?off.*?", - "max_matching_length" : 52, - "hit" : { - "hits" : { - "total" : { - "value" : 1, - "relation" : "eq" - }, - "max_score" : null, - "hits" : [ - { - "_index" : "log-messages", - "_id" : "6", - "_score" : null, - "_source" : { - "message" : "2016-02-08T00:00:00+0000 User foo_864 logged off" - }, - "sort" : [ - 1454889840000 - ] - } - ] - } - } - }, - { - "doc_count" : 1, - "key" : "User logging on", - "regex" : ".*?User.+?logging.+?on.*?", - "max_matching_length" : 52, - "hit" : { - "hits" : { - "total" : { - "value" : 1, - "relation" : "eq" - }, - "max_score" : null, - "hits" : [ - { - "_index" : "log-messages", - "_id" : "5", - "_score" : null, - "_source" : { - "message" : "2016-02-08T00:00:00+0000 User foo_325 logging on" - }, - "sort" : [ - 1454889720000 - ] - } - ] - } - } - } - ] - } - } - ] - } - } -} --------------------------------------------------- diff --git a/docs/reference/aggregations/bucket/children-aggregation.asciidoc b/docs/reference/aggregations/bucket/children-aggregation.asciidoc deleted file mode 100644 index c81d39f8edc9c..0000000000000 --- a/docs/reference/aggregations/bucket/children-aggregation.asciidoc +++ /dev/null @@ -1,226 +0,0 @@ -[[search-aggregations-bucket-children-aggregation]] -=== Children aggregation -++++ -Children -++++ - -A special single bucket aggregation that selects child documents that have the specified type, as defined in a <>. - -This aggregation has a single option: - -* `type` - The child type that should be selected. - -For example, let's say we have an index of questions and answers. The answer type has the following `join` field in the mapping: - -[source,console,id=children-aggregation-example] --------------------------------------------------- -PUT child_example -{ - "mappings": { - "properties": { - "join": { - "type": "join", - "relations": { - "question": "answer" - } - } - } - } -} --------------------------------------------------- - -The `question` document contain a tag field and the `answer` documents contain an owner field. With the `children` -aggregation the tag buckets can be mapped to the owner buckets in a single request even though the two fields exist in -two different kinds of documents. - -An example of a question document: - -[source,console] --------------------------------------------------- -PUT child_example/_doc/1 -{ - "join": { - "name": "question" - }, - "body": "

I have Windows 2003 server and i bought a new Windows 2008 server...", - "title": "Whats the best way to file transfer my site from server to a newer one?", - "tags": [ - "windows-server-2003", - "windows-server-2008", - "file-transfer" - ] -} --------------------------------------------------- -// TEST[continued] - -Examples of `answer` documents: - -[source,console] --------------------------------------------------- -PUT child_example/_doc/2?routing=1 -{ - "join": { - "name": "answer", - "parent": "1" - }, - "owner": { - "location": "Norfolk, United Kingdom", - "display_name": "Sam", - "id": 48 - }, - "body": "

Unfortunately you're pretty much limited to FTP...", - "creation_date": "2009-05-04T13:45:37.030" -} - -PUT child_example/_doc/3?routing=1&refresh -{ - "join": { - "name": "answer", - "parent": "1" - }, - "owner": { - "location": "Norfolk, United Kingdom", - "display_name": "Troll", - "id": 49 - }, - "body": "

Use Linux...", - "creation_date": "2009-05-05T13:45:37.030" -} --------------------------------------------------- -// TEST[continued] - -The following request can be built that connects the two together: - -[source,console] --------------------------------------------------- -POST child_example/_search?size=0 -{ - "aggs": { - "top-tags": { - "terms": { - "field": "tags.keyword", - "size": 10 - }, - "aggs": { - "to-answers": { - "children": { - "type" : "answer" <1> - }, - "aggs": { - "top-names": { - "terms": { - "field": "owner.display_name.keyword", - "size": 10 - } - } - } - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -<1> The `type` points to type / mapping with the name `answer`. - -The above example returns the top question tags and per tag the top answer owners. - -Possible response: - -[source,console-result] --------------------------------------------------- -{ - "took": 25, - "timed_out": false, - "_shards": { - "total": 1, - "successful": 1, - "skipped" : 0, - "failed": 0 - }, - "hits": { - "total" : { - "value": 3, - "relation": "eq" - }, - "max_score": null, - "hits": [] - }, - "aggregations": { - "top-tags": { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "file-transfer", - "doc_count": 1, <1> - "to-answers": { - "doc_count": 2, <2> - "top-names": { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "Sam", - "doc_count": 1 - }, - { - "key": "Troll", - "doc_count": 1 - } - ] - } - } - }, - { - "key": "windows-server-2003", - "doc_count": 1, <1> - "to-answers": { - "doc_count": 2, <2> - "top-names": { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "Sam", - "doc_count": 1 - }, - { - "key": "Troll", - "doc_count": 1 - } - ] - } - } - }, - { - "key": "windows-server-2008", - "doc_count": 1, <1> - "to-answers": { - "doc_count": 2, <2> - "top-names": { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "Sam", - "doc_count": 1 - }, - { - "key": "Troll", - "doc_count": 1 - } - ] - } - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 25/"took": $body.took/] - -<1> The number of question documents with the tag `file-transfer`, `windows-server-2003`, etc. -<2> The number of answer documents that are related to question documents with the tag `file-transfer`, `windows-server-2003`, etc. diff --git a/docs/reference/aggregations/bucket/composite-aggregation.asciidoc b/docs/reference/aggregations/bucket/composite-aggregation.asciidoc deleted file mode 100644 index ded01237c23c8..0000000000000 --- a/docs/reference/aggregations/bucket/composite-aggregation.asciidoc +++ /dev/null @@ -1,952 +0,0 @@ -[[search-aggregations-bucket-composite-aggregation]] -=== Composite aggregation -++++ -Composite -++++ - -WARNING: The composite aggregation is expensive. Load test your application -before deploying a composite aggregation in production. - -A multi-bucket aggregation that creates composite buckets from different sources. - -Unlike the other `multi-bucket` aggregations, you can use the `composite` -aggregation to paginate **all** buckets from a multi-level aggregation -efficiently. This aggregation provides a way to stream **all** buckets of a -specific aggregation, similar to what -<> does for documents. - -The composite buckets are built from the combinations of the -values extracted/created for each document and each combination is considered as -a composite bucket. - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /sales -{ - "mappings": { - "properties": { - "product": { - "type": "keyword" - }, - "timestamp": { - "type": "date" - }, - "price": { - "type": "long" - }, - "shop": { - "type": "keyword" - }, - "location": { - "type": "geo_point" - }, - "nested": { - "type": "nested", - "properties": { - "product": { - "type": "keyword" - }, - "timestamp": { - "type": "date" - }, - "price": { - "type": "long" - }, - "shop": { - "type": "keyword" - } - } - } - } - } -} - -POST /sales/_bulk?refresh -{"index":{"_id":0}} -{"product": "mad max", "price": "20", "timestamp": "2017-05-09T14:35"} -{"index":{"_id":1}} -{"product": "mad max", "price": "25", "timestamp": "2017-05-09T12:35"} -{"index":{"_id":2}} -{"product": "rocky", "price": "10", "timestamp": "2017-05-08T09:10"} -{"index":{"_id":3}} -{"product": "mad max", "price": "27", "timestamp": "2017-05-10T07:07"} -{"index":{"_id":4}} -{"product": "apocalypse now", "price": "10", "timestamp": "2017-05-11T08:35"} -------------------------------------------------- -// TESTSETUP - -////////////////////////// - -For example, consider the following document: - -[source,js] --------------------------------------------------- -{ - "keyword": ["foo", "bar"], - "number": [23, 65, 76] -} --------------------------------------------------- -// NOTCONSOLE - -Using `keyword` and `number` as source fields for the aggregation results in -the following composite buckets: - -[source,js] --------------------------------------------------- -{ "keyword": "foo", "number": 23 } -{ "keyword": "foo", "number": 65 } -{ "keyword": "foo", "number": 76 } -{ "keyword": "bar", "number": 23 } -{ "keyword": "bar", "number": 65 } -{ "keyword": "bar", "number": 76 } --------------------------------------------------- -// NOTCONSOLE - -==== Value sources - -The `sources` parameter defines the source fields to use when building -composite buckets. The order that the `sources` are defined controls the order -that the keys are returned. - -NOTE: You must use a unique name when defining `sources`. - -The `sources` parameter can be any of the following types: - -* <<_terms,Terms>> -* <<_histogram,Histogram>> -* <<_date_histogram,Date histogram>> -* <<_geotile_grid,GeoTile grid>> - -[[_terms]] -===== Terms - -The `terms` value source is similar to a simple `terms` aggregation. -The values are extracted from a field exactly like the `terms` aggregation. - -Example: - -[source,console,id=composite-aggregation-terms-field-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { "product": { "terms": { "field": "product" } } } - ] - } - } - } -} --------------------------------------------------- - -Like the `terms` aggregation, it's possible to use a -<> to create values for the composite buckets: - -[source,console,id=composite-aggregation-terms-runtime-field-example] ----- -GET /_search -{ - "runtime_mappings": { - "day_of_week": { - "type": "keyword", - "script": """ - emit(doc['timestamp'].value.dayOfWeekEnum - .getDisplayName(TextStyle.FULL, Locale.ENGLISH)) - """ - } - }, - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { - "dow": { - "terms": { "field": "day_of_week" } - } - } - ] - } - } - } -} ----- - -//// -[source,console-result] ----- -{ - "timed_out": false, - "took": "$body.took", - "_shards": { - "total": 1, - "successful": 1, - "failed": 0, - "skipped": 0 - }, - "hits": "$body.hits", - "aggregations": { - "my_buckets": { - "after_key": { "dow": "Wednesday" }, - "buckets": [ - { "key": { "dow": "Monday" }, "doc_count": 1 }, - { "key": { "dow": "Thursday" }, "doc_count": 1 }, - { "key": { "dow": "Tuesday" }, "doc_count": 2 }, - { "key": { "dow": "Wednesday" }, "doc_count": 1 } - ] - } - } -} ----- -//// - -Although similar, the `terms` value source doesn't support the same set of -parameters as the `terms` aggregation. For other supported value source -parameters, see: - -* <<_order>> -* <<_missing_bucket>> - -[[_histogram]] -===== Histogram - -The `histogram` value source can be applied on numeric values to build fixed size -interval over the values. The `interval` parameter defines how the numeric values should be -transformed. For instance an `interval` set to 5 will translate any numeric values to its closest interval, -a value of `101` would be translated to `100` which is the key for the interval between 100 and 105. - -Example: - -[source,console,id=composite-aggregation-histogram-field-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { "histo": { "histogram": { "field": "price", "interval": 5 } } } - ] - } - } - } -} --------------------------------------------------- - -Like the `histogram` aggregation it's possible to use a -<> to create values for the composite buckets: - -[source,console,id=composite-aggregation-histogram-runtime-field-example] ----- -GET /_search -{ - "runtime_mappings": { - "price.discounted": { - "type": "double", - "script": """ - double price = doc['price'].value; - if (doc['product'].value == 'mad max') { - price *= 0.8; - } - emit(price); - """ - } - }, - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { - "price": { - "histogram": { - "interval": 5, - "field": "price.discounted" - } - } - } - ] - } - } - } -} ----- - -//// -[source,console-result] ----- -{ - "timed_out": false, - "took": "$body.took", - "_shards": { - "total": 1, - "successful": 1, - "failed": 0, - "skipped": 0 - }, - "hits": "$body.hits", - "aggregations": { - "my_buckets": { - "after_key": { "price": 20.0 }, - "buckets": [ - { "key": { "price": 10.0 }, "doc_count": 2 }, - { "key": { "price": 15.0 }, "doc_count": 1 }, - { "key": { "price": 20.0 }, "doc_count": 2 } - ] - } - } -} ----- -//// - -[[_date_histogram]] -===== Date histogram - -The `date_histogram` is similar to the `histogram` value source except that the interval -is specified by date/time expression: - -[source,console,id=composite-aggregation-datehistogram-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d" } } } - ] - } - } - } -} --------------------------------------------------- - -The example above creates an interval per day and translates all `timestamp` values to the start of its closest intervals. -Available expressions for interval: `year`, `quarter`, `month`, `week`, `day`, `hour`, `minute`, `second` - -Time values can also be specified via abbreviations supported by <> parsing. -Note that fractional time values are not supported, but you can address this by shifting to another -time unit (e.g., `1.5h` could instead be specified as `90m`). - -*Format* - -Internally, a date is represented as a 64 bit number representing a timestamp in milliseconds-since-the-epoch. -These timestamps are returned as the bucket keys. It is possible to return a formatted date string instead using -the format specified with the format parameter: - -[source,console,id=composite-aggregation-datehistogram-format-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { - "date": { - "date_histogram": { - "field": "timestamp", - "calendar_interval": "1d", - "format": "yyyy-MM-dd" <1> - } - } - } - ] - } - } - } -} --------------------------------------------------- - -<1> Supports expressive date <> - -*Time Zone* - -Date-times are stored in Elasticsearch in UTC. By default, all bucketing and -rounding is also done in UTC. The `time_zone` parameter can be used to indicate -that bucketing should use a different time zone. - -Time zones may either be specified as an ISO 8601 UTC offset (e.g. `+01:00` or -`-08:00`) or as a timezone id, an identifier used in the TZ database like -`America/Los_Angeles`. - -*Offset* - -include::datehistogram-aggregation.asciidoc[tag=offset-explanation] - -[source,console,id=composite-aggregation-datehistogram-offset-example] ----- -PUT my-index-000001/_doc/1?refresh -{ - "date": "2015-10-01T05:30:00Z" -} - -PUT my-index-000001/_doc/2?refresh -{ - "date": "2015-10-01T06:30:00Z" -} - -GET my-index-000001/_search?size=0 -{ - "aggs": { - "my_buckets": { - "composite" : { - "sources" : [ - { - "date": { - "date_histogram" : { - "field": "date", - "calendar_interval": "day", - "offset": "+6h", - "format": "iso8601" - } - } - } - ] - } - } - } -} ----- - -include::datehistogram-aggregation.asciidoc[tag=offset-result-intro] - -[source,console-result] ----- -{ - ... - "aggregations": { - "my_buckets": { - "after_key": { "date": "2015-10-01T06:00:00.000Z" }, - "buckets": [ - { - "key": { "date": "2015-09-30T06:00:00.000Z" }, - "doc_count": 1 - }, - { - "key": { "date": "2015-10-01T06:00:00.000Z" }, - "doc_count": 1 - } - ] - } - } -} ----- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -include::datehistogram-aggregation.asciidoc[tag=offset-note] - -[[_geotile_grid]] -===== GeoTile grid - -The `geotile_grid` value source works on `geo_point` fields and groups points into buckets that represent -cells in a grid. The resulting grid can be sparse and only contains cells -that have matching data. Each cell corresponds to a -{wikipedia}/Tiled_web_map[map tile] as used by many online map -sites. Each cell is labeled using a "{zoom}/{x}/{y}" format, where zoom is equal -to the user-specified precision. - -[source,console,id=composite-aggregation-geotilegrid-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { "tile": { "geotile_grid": { "field": "location", "precision": 8 } } } - ] - } - } - } -} --------------------------------------------------- - -*Precision* - -The highest-precision geotile of length 29 produces cells that cover -less than 10cm by 10cm of land. This precision is uniquely suited for composite aggregations as each -tile does not have to be generated and loaded in memory. - -See https://wiki.openstreetmap.org/wiki/Zoom_levels[Zoom level documentation] -on how precision (zoom) correlates to size on the ground. Precision for this -aggregation can be between 0 and 29, inclusive. - -*Bounding box filtering* - -The geotile source can optionally be constrained to a specific geo bounding box, which reduces -the range of tiles used. These bounds are useful when only a specific part of a geographical area needs high -precision tiling. - -[source,console,id=composite-aggregation-geotilegrid-boundingbox-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { - "tile": { - "geotile_grid": { - "field": "location", - "precision": 22, - "bounds": { - "top_left": "POINT (4.9 52.4)", - "bottom_right": "POINT (5.0 52.3)" - } - } - } - } - ] - } - } - } -} --------------------------------------------------- - -===== Mixing different value sources - -The `sources` parameter accepts an array of value sources. -It is possible to mix different value sources to create composite buckets. -For example: - -[source,console,id=composite-aggregation-mixing-sources-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d" } } }, - { "product": { "terms": { "field": "product" } } } - ] - } - } - } -} --------------------------------------------------- - -This will create composite buckets from the values created by two value sources, a `date_histogram` and a `terms`. -Each bucket is composed of two values, one for each value source defined in the aggregation. -Any type of combinations is allowed and the order in the array is preserved -in the composite buckets. - -[source,console,id=composite-aggregation-mixing-three-sources-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { "shop": { "terms": { "field": "shop" } } }, - { "product": { "terms": { "field": "product" } } }, - { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d" } } } - ] - } - } - } -} --------------------------------------------------- - -[[_order]] -==== Order - -By default the composite buckets are sorted by their natural ordering. Values are sorted -in ascending order of their values. When multiple value sources are requested, the ordering is done per value -source, the first value of the composite bucket is compared to the first value of the other composite bucket and if they are equals the -next values in the composite bucket are used for tie-breaking. This means that the composite bucket - `[foo, 100]` is considered smaller than `[foobar, 0]` because `foo` is considered smaller than `foobar`. -It is possible to define the direction of the sort for each value source by setting `order` to `asc` (default value) -or `desc` (descending order) directly in the value source definition. -For example: - -[source,console,id=composite-aggregation-order-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d", "order": "desc" } } }, - { "product": { "terms": { "field": "product", "order": "asc" } } } - ] - } - } - } -} --------------------------------------------------- - -\... will sort the composite bucket in descending order when comparing values from the `date_histogram` source -and in ascending order when comparing values from the `terms` source. - -[[_missing_bucket]] -==== Missing bucket - -By default documents without a value for a given source are ignored. -It is possible to include them in the response by setting `missing_bucket` to -`true` (defaults to `false`): - -[source,console,id=composite-aggregation-missing-bucket-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [{ - "product_name": { - "terms": { - "field": "product", - "missing_bucket": true, - "missing_order": "last" - } - } - }] - } - } - } -} --------------------------------------------------- - -In the above example, the `product_name` source emits an explicit `null` bucket -for documents without a `product` value. This bucket is placed last. - -You can control the position of the `null` bucket using the optional -`missing_order` parameter. If `missing_order` is `first` or `last`, the `null` -bucket is placed in the respective first or last position. If `missing_order` is -omitted or `default`, the source's `order` determines the bucket's position. If -`order` is `asc` (ascending), the bucket is in the first position. If `order` is -`desc` (descending), the bucket is in the last position. - -==== Size - -The `size` parameter can be set to define how many composite buckets should be returned. -Each composite bucket is considered as a single bucket, so setting a size of 10 will return the -first 10 composite buckets created from the value sources. -The response contains the values for each composite bucket in an array containing the values extracted -from each value source. Defaults to `10`. - -==== Pagination - -If the number of composite buckets is too high (or unknown) to be returned in a single response -it is possible to split the retrieval in multiple requests. -Since the composite buckets are flat by nature, the requested `size` is exactly the number of composite buckets -that will be returned in the response (assuming that they are at least `size` composite buckets to return). -If all composite buckets should be retrieved it is preferable to use a small size (`100` or `1000` for instance) -and then use the `after` parameter to retrieve the next results. -For example: - -[source,console,id=composite-aggregation-after-key-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "size": 2, - "sources": [ - { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d" } } }, - { "product": { "terms": { "field": "product" } } } - ] - } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search\?filter_path=aggregations/] - -\... returns: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "my_buckets": { - "after_key": { - "date": 1494288000000, - "product": "mad max" - }, - "buckets": [ - { - "key": { - "date": 1494201600000, - "product": "rocky" - }, - "doc_count": 1 - }, - { - "key": { - "date": 1494288000000, - "product": "mad max" - }, - "doc_count": 2 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] - -To get the next set of buckets, resend the same aggregation with the `after` -parameter set to the `after_key` value returned in the response. -For example, this request uses the `after_key` value provided in the previous response: - -[source,console,id=composite-aggregation-after-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "size": 2, - "sources": [ - { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d", "order": "desc" } } }, - { "product": { "terms": { "field": "product", "order": "asc" } } } - ], - "after": { "date": 1494288000000, "product": "mad max" } <1> - } - } - } -} --------------------------------------------------- - -<1> Should restrict the aggregation to buckets that sort **after** the provided values. - -NOTE: The `after_key` is *usually* the key to the last bucket returned in -the response, but that isn't guaranteed. Always use the returned `after_key` instead -of deriving it from the buckets. - -==== Early termination - -For optimal performance the <> should be set on the index so that it matches -parts or fully the source order in the composite aggregation. -For instance the following index sort: - -[source,console] --------------------------------------------------- -PUT my-index-000001 -{ - "settings": { - "index": { - "sort.field": [ "username", "timestamp" ], <1> - "sort.order": [ "asc", "desc" ] <2> - } - }, - "mappings": { - "properties": { - "username": { - "type": "keyword", - "doc_values": true - }, - "timestamp": { - "type": "date" - } - } - } -} --------------------------------------------------- - -<1> This index is sorted by `username` first then by `timestamp`. -<2> ... in ascending order for the `username` field and in descending order for the `timestamp` field. - -.. could be used to optimize these composite aggregations: - -[source,console] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { "user_name": { "terms": { "field": "user_name" } } } <1> - ] - } - } - } -} --------------------------------------------------- - -<1> `user_name` is a prefix of the index sort and the order matches (`asc`). - -[source,console] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { "user_name": { "terms": { "field": "user_name" } } }, <1> - { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d", "order": "desc" } } } <2> - ] - } - } - } -} --------------------------------------------------- - -<1> `user_name` is a prefix of the index sort and the order matches (`asc`). -<2> `timestamp` matches also the prefix and the order matches (`desc`). - -In order to optimize the early termination it is advised to set `track_total_hits` in the request -to `false`. The number of total hits that match the request can be retrieved on the first request -and it would be costly to compute this number on every page: - -[source,console] --------------------------------------------------- -GET /_search -{ - "size": 0, - "track_total_hits": false, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { "user_name": { "terms": { "field": "user_name" } } }, - { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d", "order": "desc" } } } - ] - } - } - } -} --------------------------------------------------- - -Note that the order of the source is important, in the example below switching the `user_name` with the `timestamp` -would deactivate the sort optimization since this configuration wouldn't match the index sort specification. -If the order of sources do not matter for your use case you can follow these simple guidelines: - - * Put the fields with the highest cardinality first. - * Make sure that the order of the field matches the order of the index sort. - * Put multi-valued fields last since they cannot be used for early termination. - -WARNING: <> can slowdown indexing, it is very important to test index sorting -with your specific use case and dataset to ensure that it matches your requirement. If it doesn't note that `composite` -aggregations will also try to early terminate on non-sorted indices if the query matches all document (`match_all` query). - -==== Sub-aggregations - -Like any `multi-bucket` aggregations the `composite` aggregation can hold sub-aggregations. -These sub-aggregations can be used to compute other buckets or statistics on each composite bucket created by this -parent aggregation. -For instance the following example computes the average value of a field -per composite bucket: - -[source,console,id=composite-aggregation-subaggregations-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "my_buckets": { - "composite": { - "sources": [ - { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d", "order": "desc" } } }, - { "product": { "terms": { "field": "product" } } } - ] - }, - "aggregations": { - "the_avg": { - "avg": { "field": "price" } - } - } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search\?filter_path=aggregations/] - -\... returns: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "my_buckets": { - "after_key": { - "date": 1494201600000, - "product": "rocky" - }, - "buckets": [ - { - "key": { - "date": 1494460800000, - "product": "apocalypse now" - }, - "doc_count": 1, - "the_avg": { - "value": 10.0 - } - }, - { - "key": { - "date": 1494374400000, - "product": "mad max" - }, - "doc_count": 1, - "the_avg": { - "value": 27.0 - } - }, - { - "key": { - "date": 1494288000000, - "product": "mad max" - }, - "doc_count": 2, - "the_avg": { - "value": 22.5 - } - }, - { - "key": { - "date": 1494201600000, - "product": "rocky" - }, - "doc_count": 1, - "the_avg": { - "value": 10.0 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] - -[[search-aggregations-bucket-composite-aggregation-pipeline-aggregations]] -==== Pipeline aggregations - -The composite agg is not currently compatible with pipeline aggregations, nor does it make sense in most cases. -E.g. due to the paging nature of composite aggs, a single logical partition (one day for example) might be spread -over multiple pages. Since pipeline aggregations are purely post-processing on the final list of buckets, -running something like a derivative on a composite page could lead to inaccurate results as it is only taking into -account a "partial" result on that page. - -Pipeline aggs that are self contained to a single bucket (such as `bucket_selector`) might be supported in the future. diff --git a/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc deleted file mode 100644 index ef62f263a54a8..0000000000000 --- a/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc +++ /dev/null @@ -1,861 +0,0 @@ -[[search-aggregations-bucket-datehistogram-aggregation]] -=== Date histogram aggregation -++++ -Date histogram -++++ - -This multi-bucket aggregation is similar to the normal -<>, but it can -only be used with date or date range values. Because dates are represented internally in -Elasticsearch as long values, it is possible, but not as accurate, to use the -normal `histogram` on dates as well. The main difference in the two APIs is -that here the interval can be specified using date/time expressions. Time-based -data requires special support because time-based intervals are not always a -fixed length. - -Like the histogram, values are rounded *down* into the closest bucket. For -example, if the interval is a calendar day, `2020-01-03T07:00:01Z` is rounded to -`2020-01-03T00:00:00Z`. Values are rounded as follows: - -[source,java] ----- -bucket_key = Math.floor(value / interval) * interval ----- - -[[calendar_and_fixed_intervals]] -==== Calendar and fixed intervals - -When configuring a date histogram aggregation, the interval can be specified -in two manners: calendar-aware time intervals, and fixed time intervals. - -Calendar-aware intervals understand that daylight savings changes the length -of specific days, months have different amounts of days, and leap seconds can -be tacked onto a particular year. - -Fixed intervals are, by contrast, always multiples of SI units and do not change -based on calendaring context. - -[[calendar_intervals]] -==== Calendar intervals - -Calendar-aware intervals are configured with the `calendar_interval` parameter. -You can specify calendar intervals using the unit name, such as `month`, or as a -single unit quantity, such as `1M`. For example, `day` and `1d` are equivalent. -Multiple quantities, such as `2d`, are not supported. - -The accepted calendar intervals are: - -`minute`, `1m` :: - -All minutes begin at 00 seconds. -One minute is the interval between 00 seconds of the first minute and 00 -seconds of the following minute in the specified time zone, compensating for any -intervening leap seconds, so that the number of minutes and seconds past the -hour is the same at the start and end. - -`hour`, `1h` :: - -All hours begin at 00 minutes and 00 seconds. -One hour (1h) is the interval between 00:00 minutes of the first hour and 00:00 -minutes of the following hour in the specified time zone, compensating for any -intervening leap seconds, so that the number of minutes and seconds past the hour -is the same at the start and end. - -`day`, `1d` :: - -All days begin at the earliest possible time, which is usually 00:00:00 -(midnight). -One day (1d) is the interval between the start of the day and the start of -the following day in the specified time zone, compensating for any intervening -time changes. - -`week`, `1w` :: - -One week is the interval between the start day_of_week:hour:minute:second -and the same day of the week and time of the following week in the specified -time zone. - -`month`, `1M` :: - -One month is the interval between the start day of the month and time of -day and the same day of the month and time of the following month in the specified -time zone, so that the day of the month and time of day are the same at the start -and end. Note that the day may differ if an -<>. - -`quarter`, `1q` :: - -One quarter is the interval between the start day of the month and -time of day and the same day of the month and time of day three months later, -so that the day of the month and time of day are the same at the start and end. + - -`year`, `1y` :: - -One year is the interval between the start day of the month and time of -day and the same day of the month and time of day the following year in the -specified time zone, so that the date and time are the same at the start and end. + - -[[calendar_interval_examples]] -===== Calendar interval examples -As an example, here is an aggregation requesting bucket intervals of a month in calendar time: - -[source,console,id=datehistogram-aggregation-calendar-interval-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "sales_over_time": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -If you attempt to use multiples of calendar units, the aggregation will fail because only -singular calendar units are supported: - -[source,console,id=datehistogram-aggregation-calendar-interval-multiples-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "sales_over_time": { - "date_histogram": { - "field": "date", - "calendar_interval": "2d" - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] -// TEST[catch:bad_request] - -[source,js] --------------------------------------------------- -{ - "error" : { - "root_cause" : [...], - "type" : "x_content_parse_exception", - "reason" : "[1:82] [date_histogram] failed to parse field [calendar_interval]", - "caused_by" : { - "type" : "illegal_argument_exception", - "reason" : "The supplied interval [2d] could not be parsed as a calendar interval.", - "stack_trace" : "java.lang.IllegalArgumentException: The supplied interval [2d] could not be parsed as a calendar interval." - } - } -} - --------------------------------------------------- -// NOTCONSOLE - -[[fixed_intervals]] -==== Fixed intervals - -Fixed intervals are configured with the `fixed_interval` parameter. - -In contrast to calendar-aware intervals, fixed intervals are a fixed number of SI -units and never deviate, regardless of where they fall on the calendar. One second -is always composed of `1000ms`. This allows fixed intervals to be specified in -any multiple of the supported units. - -However, it means fixed intervals cannot express other units such as months, -since the duration of a month is not a fixed quantity. Attempting to specify -a calendar interval like month or quarter will throw an exception. - -The accepted units for fixed intervals are: - -milliseconds (`ms`) :: -A single millisecond. This is a very, very small interval. - -seconds (`s`) :: -Defined as 1000 milliseconds each. - -minutes (`m`) :: -Defined as 60 seconds each (60,000 milliseconds). -All minutes begin at 00 seconds. - -hours (`h`) :: -Defined as 60 minutes each (3,600,000 milliseconds). -All hours begin at 00 minutes and 00 seconds. - -days (`d`) :: -Defined as 24 hours (86,400,000 milliseconds). -All days begin at the earliest possible time, which is usually 00:00:00 -(midnight). - -[[fixed_interval_examples]] -===== Fixed interval examples - -If we try to recreate the "month" `calendar_interval` from earlier, we can approximate that with -30 fixed days: - -[source,console,id=datehistogram-aggregation-fixed-interval-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "sales_over_time": { - "date_histogram": { - "field": "date", - "fixed_interval": "30d" - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -But if we try to use a calendar unit that is not supported, such as weeks, we'll get an exception: - -[source,console,id=datehistogram-aggregation-fixed-interval-unsupported-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "sales_over_time": { - "date_histogram": { - "field": "date", - "fixed_interval": "2w" - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] -// TEST[catch:bad_request] - -[source,js] --------------------------------------------------- -{ - "error" : { - "root_cause" : [...], - "type" : "x_content_parse_exception", - "reason" : "[1:82] [date_histogram] failed to parse field [fixed_interval]", - "caused_by" : { - "type" : "illegal_argument_exception", - "reason" : "failed to parse setting [date_histogram.fixedInterval] with value [2w] as a time value: unit is missing or unrecognized", - "stack_trace" : "java.lang.IllegalArgumentException: failed to parse setting [date_histogram.fixedInterval] with value [2w] as a time value: unit is missing or unrecognized" - } - } -} - --------------------------------------------------- -// NOTCONSOLE - -[[datehistogram-aggregation-notes]] -==== Date histogram usage notes - -In all cases, when the specified end time does not exist, the actual end time is -the closest available time after the specified end. - -Widely distributed applications must also consider vagaries such as countries that -start and stop daylight savings time at 12:01 A.M., so end up with one minute of -Sunday followed by an additional 59 minutes of Saturday once a year, and countries -that decide to move across the international date line. Situations like -that can make irregular time zone offsets seem easy. - -As always, rigorous testing, especially around time-change events, will ensure -that your time interval specification is -what you intend it to be. - -WARNING: To avoid unexpected results, all connected servers and clients must -sync to a reliable network time service. - -NOTE: Fractional time values are not supported, but you can address this by -shifting to another time unit (e.g., `1.5h` could instead be specified as `90m`). - -NOTE: You can also specify time values using abbreviations supported by -<> parsing. - -[[datehistogram-aggregation-keys]] -==== Keys - -Internally, a date is represented as a 64 bit number representing a timestamp -in milliseconds-since-the-epoch (01/01/1970 midnight UTC). These timestamps are -returned as the ++key++ name of the bucket. The `key_as_string` is the same -timestamp converted to a formatted -date string using the `format` parameter specification: - -TIP: If you don't specify `format`, the first date -<> specified in the field mapping is used. - -[source,console,id=datehistogram-aggregation-format-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "sales_over_time": { - "date_histogram": { - "field": "date", - "calendar_interval": "1M", - "format": "yyyy-MM-dd" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> Supports expressive date <> - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "sales_over_time": { - "buckets": [ - { - "key_as_string": "2015-01-01", - "key": 1420070400000, - "doc_count": 3 - }, - { - "key_as_string": "2015-02-01", - "key": 1422748800000, - "doc_count": 2 - }, - { - "key_as_string": "2015-03-01", - "key": 1425168000000, - "doc_count": 2 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -[[datehistogram-aggregation-time-zone]] -==== Time zone - -{es} stores date-times in Coordinated Universal Time (UTC). By default, all bucketing and -rounding is also done in UTC. Use the `time_zone` parameter to indicate -that bucketing should use a different time zone. - -When you specify a time zone, the following logic is used to determine the bucket the document belongs in: - -[source,java] ----- -bucket_key = localToUtc(Math.floor(utcToLocal(value) / interval) * interval)) ----- - -For example, if the interval is a calendar day and the time zone is -`America/New_York`, then the date value `2020-01-03T01:00:01Z` is processed as follows: - -. Converted to EST: `2020-01-02T20:00:01` -. Rounded down to the nearest interval: `2020-01-02T00:00:00` -. Converted back to UTC: `2020-01-02T05:00:00:00Z` - -When a `key_as_string` is generated for the bucket, the key value is stored in `America/New_York` time, so it'll display as `"2020-01-02T00:00:00"`. - -You can specify time zones as an ISO 8601 UTC offset, such as `+01:00` or -`-08:00`, or as an IANA time zone ID, -such as `America/Los_Angeles`. - -Consider the following example: - -[source,console,id=datehistogram-aggregation-timezone-example] ---------------------------------- -PUT my-index-000001/_doc/1?refresh -{ - "date": "2015-10-01T00:30:00Z" -} - -PUT my-index-000001/_doc/2?refresh -{ - "date": "2015-10-01T01:30:00Z" -} - -GET my-index-000001/_search?size=0 -{ - "aggs": { - "by_day": { - "date_histogram": { - "field": "date", - "calendar_interval": "day" - } - } - } -} ---------------------------------- - -If you don't specify a time zone, UTC is used. This would result in both of these -documents being placed into the same day bucket, which starts at midnight UTC -on 1 October 2015: - -[source,console-result] ---------------------------------- -{ - ... - "aggregations": { - "by_day": { - "buckets": [ - { - "key_as_string": "2015-10-01T00:00:00.000Z", - "key": 1443657600000, - "doc_count": 2 - } - ] - } - } -} ---------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -If you specify a `time_zone` of `-01:00`, midnight in that time zone is one hour -before midnight UTC: - -[source,console] ---------------------------------- -GET my-index-000001/_search?size=0 -{ - "aggs": { - "by_day": { - "date_histogram": { - "field": "date", - "calendar_interval": "day", - "time_zone": "-01:00" - } - } - } -} ---------------------------------- -// TEST[continued] - -Now the first document falls into the bucket for 30 September 2015, while the -second document falls into the bucket for 1 October 2015: - -[source,console-result] ---------------------------------- -{ - ... - "aggregations": { - "by_day": { - "buckets": [ - { - "key_as_string": "2015-09-30T00:00:00.000-01:00", <1> - "key": 1443574800000, - "doc_count": 1 - }, - { - "key_as_string": "2015-10-01T00:00:00.000-01:00", <1> - "key": 1443661200000, - "doc_count": 1 - } - ] - } - } -} ---------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -<1> The `key_as_string` value represents midnight on each day - in the specified time zone. - -WARNING: Many time zones shift their clocks for daylight savings time. Buckets -close to the moment when those changes happen can have slightly different sizes -than you would expect from the `calendar_interval` or `fixed_interval`. -For example, consider a DST start in the `CET` time zone: on 27 March 2016 at 2am, -clocks were turned forward 1 hour to 3am local time. If you use `day` as the -`calendar_interval`, the bucket covering that day will only hold data for 23 -hours instead of the usual 24 hours for other buckets. The same is true for -shorter intervals, like a `fixed_interval` of `12h`, where you'll have only a 11h -bucket on the morning of 27 March when the DST shift happens. - -[[search-aggregations-bucket-datehistogram-offset]] -==== Offset - -// tag::offset-explanation[] -Use the `offset` parameter to change the start value of each bucket by the -specified positive (`+`) or negative offset (`-`) duration, such as `1h` for -an hour, or `1d` for a day. See <> for more possible time -duration options. - -For example, when using an interval of `day`, each bucket runs from midnight -to midnight. Setting the `offset` parameter to `+6h` changes each bucket -to run from 6am to 6am: -// end::offset-explanation[] - -[source,console,id=datehistogram-aggregation-offset-example] ------------------------------ -PUT my-index-000001/_doc/1?refresh -{ - "date": "2015-10-01T05:30:00Z" -} - -PUT my-index-000001/_doc/2?refresh -{ - "date": "2015-10-01T06:30:00Z" -} - -GET my-index-000001/_search?size=0 -{ - "aggs": { - "by_day": { - "date_histogram": { - "field": "date", - "calendar_interval": "day", - "offset": "+6h" - } - } - } -} ------------------------------ - -// tag::offset-result-intro[] -Instead of a single bucket starting at midnight, the above request groups the -documents into buckets starting at 6am: -// end::offset-result-intro[] - -[source,console-result] ------------------------------ -{ - ... - "aggregations": { - "by_day": { - "buckets": [ - { - "key_as_string": "2015-09-30T06:00:00.000Z", - "key": 1443592800000, - "doc_count": 1 - }, - { - "key_as_string": "2015-10-01T06:00:00.000Z", - "key": 1443679200000, - "doc_count": 1 - } - ] - } - } -} ------------------------------ -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -// tag::offset-note[] -NOTE: The start `offset` of each bucket is calculated after `time_zone` -adjustments have been made. -// end::offset-note[] - -[[search-aggregations-bucket-datehistogram-offset-months]] -===== Long offsets over calendar intervals - -It is typical to use offsets in units smaller than the `calendar_interval`. For example, -using offsets in hours when the interval is days, or an offset of days when the interval is months. -If the calendar interval is always of a standard length, or the `offset` is less than one unit of the calendar -interval (for example less than `+24h` for `days` or less than `+28d` for months), -then each bucket will have a repeating start. For example `+6h` for `days` will result in all buckets -starting at 6am each day. However, `+30h` will also result in buckets starting at 6am, except when crossing -days that change from standard to summer-savings time or vice-versa. - -This situation is much more pronounced for months, where each month has a different length -to at least one of its adjacent months. -To demonstrate this, consider eight documents each with a date field on the 20th day of each of the -eight months from January to August of 2022. - -When querying for a date histogram over the calendar interval of months, the response will return one bucket per month, each with a single document. -Each bucket will have a key named after the first day of the month, plus any offset. -For example, the offset of `+19d` will result in buckets with names like `2022-01-20`. - -[source,console,id=datehistogram-aggregation-offset-example-19d] --------------------------------------------------- -"buckets": [ - { "key_as_string": "2022-01-20", "key": 1642636800000, "doc_count": 1 }, - { "key_as_string": "2022-02-20", "key": 1645315200000, "doc_count": 1 }, - { "key_as_string": "2022-03-20", "key": 1647734400000, "doc_count": 1 }, - { "key_as_string": "2022-04-20", "key": 1650412800000, "doc_count": 1 }, - { "key_as_string": "2022-05-20", "key": 1653004800000, "doc_count": 1 }, - { "key_as_string": "2022-06-20", "key": 1655683200000, "doc_count": 1 }, - { "key_as_string": "2022-07-20", "key": 1658275200000, "doc_count": 1 }, - { "key_as_string": "2022-08-20", "key": 1660953600000, "doc_count": 1 } -] --------------------------------------------------- -// TESTRESPONSE[skip:no setup made for this example yet] - -Increasing the offset to `+20d`, each document will appear in a bucket for the previous month, -with all bucket keys ending with the same day of the month, as normal. -However, further increasing to `+28d`, -what used to be a February bucket has now become `"2022-03-01"`. - -[source,console,id=datehistogram-aggregation-offset-example-28d] --------------------------------------------------- -"buckets": [ - { "key_as_string": "2021-12-29", "key": 1640736000000, "doc_count": 1 }, - { "key_as_string": "2022-01-29", "key": 1643414400000, "doc_count": 1 }, - { "key_as_string": "2022-03-01", "key": 1646092800000, "doc_count": 1 }, - { "key_as_string": "2022-03-29", "key": 1648512000000, "doc_count": 1 }, - { "key_as_string": "2022-04-29", "key": 1651190400000, "doc_count": 1 }, - { "key_as_string": "2022-05-29", "key": 1653782400000, "doc_count": 1 }, - { "key_as_string": "2022-06-29", "key": 1656460800000, "doc_count": 1 }, - { "key_as_string": "2022-07-29", "key": 1659052800000, "doc_count": 1 } -] --------------------------------------------------- -// TESTRESPONSE[skip:no setup made for this example yet] - -If we continue to increase the offset, the 30-day months will also shift into the next month, -so that 3 of the 8 buckets have different days than the other five. -In fact if we keep going, we will find cases where two documents appear in the same month. -Documents that were originally 30 days apart can be shifted into the same 31-day month bucket. - -For example, for `+50d` we see: - -[source,console,id=datehistogram-aggregation-offset-example-50d] --------------------------------------------------- -"buckets": [ - { "key_as_string": "2022-01-20", "key": 1642636800000, "doc_count": 1 }, - { "key_as_string": "2022-02-20", "key": 1645315200000, "doc_count": 2 }, - { "key_as_string": "2022-04-20", "key": 1650412800000, "doc_count": 2 }, - { "key_as_string": "2022-06-20", "key": 1655683200000, "doc_count": 2 }, - { "key_as_string": "2022-08-20", "key": 1660953600000, "doc_count": 1 } -] --------------------------------------------------- -// TESTRESPONSE[skip:no setup made for this example yet] - -It is therefore always important when using `offset` with `calendar_interval` bucket sizes -to understand the consequences of using offsets larger than the interval size. - -More examples: - -* If the goal is to, for example, have an annual histogram where each year starts on the 5th February, -you could use `calendar_interval` of `year` and `offset` of `+33d`, and each year will be shifted identically, -because the offset includes only January, which is the same length every year. -However, if the goal is to have the year start on the 5th March instead, this technique will not work because -the offset includes February, which changes length every four years. -* If you want a quarterly histogram starting on a date within the first month of the year, it will work, -but as soon as you push the start date into the second month by having an offset longer than a month, the -quarters will all start on different dates. - -[[date-histogram-keyed-response]] -==== Keyed response - -Setting the `keyed` flag to `true` associates a unique string key with each -bucket and returns the ranges as a hash rather than an array: - -[source,console,id=datehistogram-aggregation-keyed-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "sales_over_time": { - "date_histogram": { - "field": "date", - "calendar_interval": "1M", - "format": "yyyy-MM-dd", - "keyed": true - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "sales_over_time": { - "buckets": { - "2015-01-01": { - "key_as_string": "2015-01-01", - "key": 1420070400000, - "doc_count": 3 - }, - "2015-02-01": { - "key_as_string": "2015-02-01", - "key": 1422748800000, - "doc_count": 2 - }, - "2015-03-01": { - "key_as_string": "2015-03-01", - "key": 1425168000000, - "doc_count": 2 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -[[date-histogram-scripts]] -==== Scripts - -If the data in your documents doesn't exactly match what you'd like to aggregate, -use a <> . For example, if the revenue -for promoted sales should be recognized a day after the sale date: - -[source,console,id=datehistogram-aggregation-runtime-field] ----- -POST /sales/_search?size=0 -{ - "runtime_mappings": { - "date.promoted_is_tomorrow": { - "type": "date", - "script": """ - long date = doc['date'].value.toInstant().toEpochMilli(); - if (doc['promoted'].value) { - date += 86400; - } - emit(date); - """ - } - }, - "aggs": { - "sales_over_time": { - "date_histogram": { - "field": "date.promoted_is_tomorrow", - "calendar_interval": "1M" - } - } - } -} ----- -// TEST[setup:sales] - -//// - -[source,console-result] ----- -{ - ... - "aggregations": { - "sales_over_time": { - "buckets": [ - { - "key_as_string": "2015-01-01T00:00:00.000Z", - "key": 1420070400000, - "doc_count": 3 - }, - { - "key_as_string": "2015-02-01T00:00:00.000Z", - "key": 1422748800000, - "doc_count": 2 - }, - { - "key_as_string": "2015-03-01T00:00:00.000Z", - "key": 1425168000000, - "doc_count": 2 - } - ] - } - } -} ----- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -//// - -[[date-histogram-params]] -==== Parameters - -You can control the order of the returned -buckets using the `order` -settings and filter the returned buckets based on a `min_doc_count` setting -(by default all buckets between the first -bucket that matches documents and the last one are returned). This histogram -also supports the `extended_bounds` -setting, which enables extending the bounds of the histogram beyond the data -itself, and `hard_bounds` that limits the histogram to specified bounds. -For more information, see -<> and -<>. - -[[date-histogram-missing-value]] -===== Missing value - -The `missing` parameter defines how to treat documents that are missing a value. -By default, they are ignored, but it is also possible to treat them as if they -have a value. - -[source,console,id=datehistogram-aggregation-missing-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "sale_date": { - "date_histogram": { - "field": "date", - "calendar_interval": "year", - "missing": "2000/01/01" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> Documents without a value in the `date` field will fall into the -same bucket as documents that have the value `2000-01-01`. - -[[date-histogram-order]] -===== Order - -By default the returned buckets are sorted by their `key` ascending, but you can -control the order using -the `order` setting. This setting supports the same `order` functionality as -<>. - -[[date-histogram-aggregate-scripts]] -===== Using a script to aggregate by day of the week - -When you need to aggregate the results by day of the week, run a `terms` -aggregation on a <> that returns the day of the week: - -[source,console,id=datehistogram-aggregation-day-of-week-runtime-field] ----- -POST /sales/_search?size=0 -{ - "runtime_mappings": { - "date.day_of_week": { - "type": "keyword", - "script": "emit(doc['date'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH))" - } - }, - "aggs": { - "day_of_week": { - "terms": { "field": "date.day_of_week" } - } - } -} ----- -// TEST[setup:sales] - -Response: - -[source,console-result] ----- -{ - ... - "aggregations": { - "day_of_week": { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "Sunday", - "doc_count": 4 - }, - { - "key": "Thursday", - "doc_count": 3 - } - ] - } - } -} ----- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -The response will contain all the buckets having the relative day of -the week as key : 1 for Monday, 2 for Tuesday... 7 for Sunday. diff --git a/docs/reference/aggregations/bucket/daterange-aggregation.asciidoc b/docs/reference/aggregations/bucket/daterange-aggregation.asciidoc deleted file mode 100644 index 3fa48523ab6ed..0000000000000 --- a/docs/reference/aggregations/bucket/daterange-aggregation.asciidoc +++ /dev/null @@ -1,402 +0,0 @@ -[[search-aggregations-bucket-daterange-aggregation]] -=== Date range aggregation -++++ -Date range -++++ - -A range aggregation that is dedicated for date values. The main difference -between this aggregation and the normal -<> -aggregation is that the `from` and `to` values can be expressed in -<> expressions, and it is also possible to specify a date -format by which the `from` and `to` response fields will be returned. -Note that this aggregation includes the `from` value and excludes the `to` value -for each range. - -Example: - -[source,console,id=daterange-aggregation-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "range": { - "date_range": { - "field": "date", - "format": "MM-yyyy", - "ranges": [ - { "to": "now-10M/M" }, <1> - { "from": "now-10M/M" } <2> - ] - } - } - } -} --------------------------------------------------- -// TEST[setup:sales s/now-10M\/M/10-2015/] - -<1> < now minus 10 months, rounded down to the start of the month. -<2> >= now minus 10 months, rounded down to the start of the month. - -In the example above, we created two range buckets, the first will "bucket" all -documents dated prior to 10 months ago and the second will "bucket" all -documents dated since 10 months ago - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "range": { - "buckets": [ - { - "to": 1.4436576E12, - "to_as_string": "10-2015", - "doc_count": 7, - "key": "*-10-2015" - }, - { - "from": 1.4436576E12, - "from_as_string": "10-2015", - "doc_count": 0, - "key": "10-2015-*" - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -WARNING: If a format or date value is incomplete, the date range aggregation -replaces any missing components with default values. See -<>. - -==== Missing Values - -The `missing` parameter defines how documents that are missing a value should -be treated. By default they will be ignored but it is also possible to treat -them as if they had a value. This is done by adding a set of fieldname : -value mappings to specify default values per field. - -[source,console,id=daterange-aggregation-missing-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "range": { - "date_range": { - "field": "date", - "missing": "1976/11/30", - "ranges": [ - { - "key": "Older", - "to": "2016/02/01" - }, <1> - { - "key": "Newer", - "from": "2016/02/01", - "to" : "now/d" - } - ] - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> Documents without a value in the `date` field will be added to the "Older" -bucket, as if they had a date value of "1976-11-30". - -[[date-format-pattern]] -==== Date Format/Pattern - -NOTE: this information was copied from -https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html[DateTimeFormatter] - -All ASCII letters are reserved as format pattern letters, which are defined -as follows: - -[options="header"] -|======= -|Symbol |Meaning |Presentation |Examples -|G |era |text |AD; Anno Domini; A -|u |year |year |2004; 04 -|y |year-of-era |year |2004; 04 -|D |day-of-year |number |189 -|M/L |month-of-year |number/text |7; 07; Jul; July; J -|d |day-of-month |number |10 - -|Q/q |quarter-of-year |number/text |3; 03; Q3; 3rd quarter -|Y |week-based-year |year |1996; 96 -|w |week-of-week-based-year |number |27 -|W |week-of-month |number |4 -|E |day-of-week |text |Tue; Tuesday; T -|e/c |localized day-of-week |number/text |2; 02; Tue; Tuesday; T -|F |week-of-month |number |3 - -|a |am-pm-of-day |text |PM -|h |clock-hour-of-am-pm (1-12) |number |12 -|K |hour-of-am-pm (0-11) |number |0 -|k |clock-hour-of-am-pm (1-24) |number |0 - -|H |hour-of-day (0-23) |number |0 -|m |minute-of-hour |number |30 -|s |second-of-minute |number |55 -|S |fraction-of-second |fraction |978 -|A |milli-of-day |number |1234 -|n |nano-of-second |number |987654321 -|N |nano-of-day |number |1234000000 - -|V |time-zone ID |zone-id |America/Los_Angeles; Z; -08:30 -|z |time-zone name |zone-name |Pacific Standard Time; PST -|O |localized zone-offset |offset-O |GMT+8; GMT+08:00; UTC-08:00; -|X |zone-offset 'Z' for zero |offset-X |Z; -08; -0830; -08:30; -083015; -08:30:15; -|x |zone-offset |offset-x |+0000; -08; -0830; -08:30; -083015; -08:30:15; -|Z |zone-offset |offset-Z |+0000; -0800; -08:00; - -|p |pad next |pad modifier |1 -|' |escape for text |delimiter -|'' |single quote |literal |' -|[ |optional section start -|] |optional section end -|# |reserved for future use -|{ |reserved for future use -|} |reserved for future use -|======= - -The count of pattern letters determines the format. - -Text:: The text style is determined based on the number of pattern letters -used. Less than 4 pattern letters will use the short form. Exactly 4 -pattern letters will use the full form. Exactly 5 pattern letters will use -the narrow form. Pattern letters `L`, `c`, and `q` specify the stand-alone -form of the text styles. - -Number:: If the count of letters is one, then the value is output using -the minimum number of digits and without padding. Otherwise, the count of -digits is used as the width of the output field, with the value -zero-padded as necessary. The following pattern letters have constraints -on the count of letters. Only one letter of `c` and `F` can be specified. -Up to two letters of `d`, `H`, `h`, `K`, `k`, `m`, and `s` can be -specified. Up to three letters of `D` can be specified. - -Number/Text:: If the count of pattern letters is 3 or greater, use the -Text rules above. Otherwise use the Number rules above. - -Fraction:: Outputs the nano-of-second field as a fraction-of-second. The -nano-of-second value has nine digits, thus the count of pattern letters is -from 1 to 9. If it is less than 9, then the nano-of-second value is -truncated, with only the most significant digits being output. - -Year:: The count of letters determines the minimum field width below which -padding is used. If the count of letters is two, then a reduced two digit -form is used. For printing, this outputs the rightmost two digits. For -parsing, this will parse using the base value of 2000, resulting in a year -within the range 2000 to 2099 inclusive. If the count of letters is less -than four (but not two), then the sign is only output for negative years -as per `SignStyle.NORMAL`. Otherwise, the sign is output if the pad width is -exceeded, as per `SignStyle.EXCEEDS_PAD`. - -ZoneId:: This outputs the time-zone ID, such as `Europe/Paris`. If the -count of letters is two, then the time-zone ID is output. Any other count -of letters throws `IllegalArgumentException`. - -Zone names:: This outputs the display name of the time-zone ID. If the -count of letters is one, two or three, then the short name is output. If -the count of letters is four, then the full name is output. Five or more -letters throws `IllegalArgumentException`. - -Offset X and x:: This formats the offset based on the number of pattern -letters. One letter outputs just the hour, such as `+01`, unless the -minute is non-zero in which case the minute is also output, such as -`+0130`. Two letters outputs the hour and minute, without a colon, such as -`+0130`. Three letters outputs the hour and minute, with a colon, such as -`+01:30`. Four letters outputs the hour and minute and optional second, -without a colon, such as `+013015`. Five letters outputs the hour and -minute and optional second, with a colon, such as `+01:30:15`. Six or -more letters throws `IllegalArgumentException`. Pattern letter `X` (upper -case) will output `Z` when the offset to be output would be zero, -whereas pattern letter `x` (lower case) will output `+00`, `+0000`, or -`+00:00`. - -Offset O:: This formats the localized offset based on the number of -pattern letters. One letter outputs the short form of the localized -offset, which is localized offset text, such as `GMT`, with hour without -leading zero, optional 2-digit minute and second if non-zero, and colon, -for example `GMT+8`. Four letters outputs the full form, which is -localized offset text, such as `GMT, with 2-digit hour and minute -field, optional second field if non-zero, and colon, for example -`GMT+08:00`. Any other count of letters throws -`IllegalArgumentException`. - -Offset Z:: This formats the offset based on the number of pattern letters. -One, two or three letters outputs the hour and minute, without a colon, -such as `+0130`. The output will be `+0000` when the offset is zero. -Four letters outputs the full form of localized offset, equivalent to -four letters of Offset-O. The output will be the corresponding localized -offset text if the offset is zero. Five letters outputs the hour, -minute, with optional second if non-zero, with colon. It outputs `Z` if -the offset is zero. Six or more letters throws IllegalArgumentException. - -Optional section:: The optional section markers work exactly like calling -`DateTimeFormatterBuilder.optionalStart()` and -`DateTimeFormatterBuilder.optionalEnd()`. - -Pad modifier:: Modifies the pattern that immediately follows to be padded -with spaces. The pad width is determined by the number of pattern letters. -This is the same as calling `DateTimeFormatterBuilder.padNext(int)`. - -For example, `ppH` outputs the hour-of-day padded on the left with spaces to a width of 2. - -Any unrecognized letter is an error. Any non-letter character, other than -`[`, `]`, `{`, `}`, `#` and the single quote will be output directly. -Despite this, it is recommended to use single quotes around all characters -that you want to output directly to ensure that future changes do not -break your application. - - -[[time-zones]] -==== Time zone in date range aggregations - -Dates can be converted from another time zone to UTC by specifying the -`time_zone` parameter. - -Time zones may either be specified as an ISO 8601 UTC offset (e.g. +01:00 or --08:00) or as one of the time zone ids from the TZ database. - -The `time_zone` parameter is also applied to rounding in date math expressions. -As an example, to round to the beginning of the day in the CET time zone, you -can do the following: - -[source,console,id=daterange-aggregation-timezone-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "range": { - "date_range": { - "field": "date", - "time_zone": "CET", - "ranges": [ - { "to": "2016/02/01" }, <1> - { "from": "2016/02/01", "to" : "now/d" }, <2> - { "from": "now/d" } - ] - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> This date will be converted to `2016-02-01T00:00:00.000+01:00`. -<2> `now/d` will be rounded to the beginning of the day in the CET time zone. - -==== Keyed Response - -Setting the `keyed` flag to `true` will associate a unique string key with each -bucket and return the ranges as a hash rather than an array: - -[source,console,id=daterange-aggregation-keyed-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "range": { - "date_range": { - "field": "date", - "format": "MM-yyy", - "ranges": [ - { "to": "now-10M/M" }, - { "from": "now-10M/M" } - ], - "keyed": true - } - } - } -} --------------------------------------------------- -// TEST[setup:sales s/now-10M\/M/10-2015/] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "range": { - "buckets": { - "*-10-2015": { - "to": 1.4436576E12, - "to_as_string": "10-2015", - "doc_count": 7 - }, - "10-2015-*": { - "from": 1.4436576E12, - "from_as_string": "10-2015", - "doc_count": 0 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -It is also possible to customize the key for each range: - -[source,console,id=daterange-aggregation-keyed-multiple-keys-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "range": { - "date_range": { - "field": "date", - "format": "MM-yyy", - "ranges": [ - { "from": "01-2015", "to": "03-2015", "key": "quarter_01" }, - { "from": "03-2015", "to": "06-2015", "key": "quarter_02" } - ], - "keyed": true - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "range": { - "buckets": { - "quarter_01": { - "from": 1.4200704E12, - "from_as_string": "01-2015", - "to": 1.425168E12, - "to_as_string": "03-2015", - "doc_count": 5 - }, - "quarter_02": { - "from": 1.425168E12, - "from_as_string": "03-2015", - "to": 1.4331168E12, - "to_as_string": "06-2015", - "doc_count": 2 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] diff --git a/docs/reference/aggregations/bucket/diversified-sampler-aggregation.asciidoc b/docs/reference/aggregations/bucket/diversified-sampler-aggregation.asciidoc deleted file mode 100644 index 0fcf8f9c46fc0..0000000000000 --- a/docs/reference/aggregations/bucket/diversified-sampler-aggregation.asciidoc +++ /dev/null @@ -1,208 +0,0 @@ -[[search-aggregations-bucket-diversified-sampler-aggregation]] -=== Diversified sampler aggregation -++++ -Diversified sampler -++++ - -Like the `sampler` aggregation this is a filtering aggregation used to limit any sub aggregations' processing to a sample of the top-scoring documents. -The `diversified_sampler` aggregation adds the ability to limit the number of matches that share a common value such as an "author". - -NOTE: Any good market researcher will tell you that when working with samples of data it is important -that the sample represents a healthy variety of opinions rather than being skewed by any single voice. -The same is true with aggregations and sampling with these diversify settings can offer a way to remove the bias in your content (an over-populated geography, -a large spike in a timeline or an over-active forum spammer). - - -.Example use cases: -* Tightening the focus of analytics to high-relevance matches rather than the potentially very long tail of low-quality matches -* Removing bias from analytics by ensuring fair representation of content from different sources -* Reducing the running cost of aggregations that can produce useful results using only samples e.g. `significant_terms` - -The `field` setting is used to provide values used for de-duplication and the `max_docs_per_value` setting controls the maximum -number of documents collected on any one shard which share a common value. The default setting for `max_docs_per_value` is 1. - -The aggregation will throw an error if the `field` produces multiple values for a single document (de-duplication using multi-valued fields is not supported due to efficiency concerns). - - -Example: - -We might want to see which tags are strongly associated with `#elasticsearch` on StackOverflow -forum posts but ignoring the effects of some prolific users with a tendency to misspell #Kibana as #Cabana. - -[source,console,id=diversified-sampler-aggregation-example] --------------------------------------------------- -POST /stackoverflow/_search?size=0 -{ - "query": { - "query_string": { - "query": "tags:elasticsearch" - } - }, - "aggs": { - "my_unbiased_sample": { - "diversified_sampler": { - "shard_size": 200, - "field": "author" - }, - "aggs": { - "keywords": { - "significant_terms": { - "field": "tags", - "exclude": [ "elasticsearch" ] - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:stackoverflow] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "my_unbiased_sample": { - "doc_count": 151, <1> - "keywords": { <2> - "doc_count": 151, - "bg_count": 650, - "buckets": [ - { - "key": "kibana", - "doc_count": 150, - "score": 2.213, - "bg_count": 200 - } - ] - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -// TESTRESPONSE[s/2.213/$body.aggregations.my_unbiased_sample.keywords.buckets.0.score/] - -<1> 151 documents were sampled in total. -<2> The results of the significant_terms aggregation are not skewed by any single author's quirks because we asked for a maximum of one post from any one author in our sample. - -==== Scripted example - -In this scenario we might want to diversify on a combination of field values. We can use a <> to -produce a hash of the multiple values in a tags field to ensure we don't have a sample that consists of the same -repeated combinations of tags. - -[source,console,id=diversified-sampler-aggregation-runtime-field-example] ----- -POST /stackoverflow/_search?size=0 -{ - "query": { - "query_string": { - "query": "tags:kibana" - } - }, - "runtime_mappings": { - "tags.hash": { - "type": "long", - "script": "emit(doc['tags'].hashCode())" - } - }, - "aggs": { - "my_unbiased_sample": { - "diversified_sampler": { - "shard_size": 200, - "max_docs_per_value": 3, - "field": "tags.hash" - }, - "aggs": { - "keywords": { - "significant_terms": { - "field": "tags", - "exclude": [ "kibana" ] - } - } - } - } - } -} ----- -// TEST[setup:stackoverflow] - -Response: - -[source,console-result] ----- -{ - ... - "aggregations": { - "my_unbiased_sample": { - "doc_count": 6, - "keywords": { - "doc_count": 6, - "bg_count": 650, - "buckets": [ - { - "key": "logstash", - "doc_count": 3, - "score": 2.213, - "bg_count": 50 - }, - { - "key": "elasticsearch", - "doc_count": 3, - "score": 1.34, - "bg_count": 200 - } - ] - } - } - } -} ----- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -// TESTRESPONSE[s/2.213/$body.aggregations.my_unbiased_sample.keywords.buckets.0.score/] -// TESTRESPONSE[s/1.34/$body.aggregations.my_unbiased_sample.keywords.buckets.1.score/] - -==== shard_size - -The `shard_size` parameter limits how many top-scoring documents are collected in the sample processed on each shard. -The default value is 100. - -==== max_docs_per_value -The `max_docs_per_value` is an optional parameter and limits how many documents are permitted per choice of de-duplicating value. -The default setting is "1". - - -==== execution_hint - -The optional `execution_hint` setting can influence the management of the values used for de-duplication. -Each option will hold up to `shard_size` values in memory while performing de-duplication but the type of value held can be controlled as follows: - - - hold field values directly (`map`) - - hold ordinals of the field as determined by the Lucene index (`global_ordinals`) - - hold hashes of the field values - with potential for hash collisions (`bytes_hash`) - -The default setting is to use <> if this information is available from the Lucene index and reverting to `map` if not. -The `bytes_hash` setting may prove faster in some cases but introduces the possibility of false positives in de-duplication logic due to the possibility of hash collisions. -Please note that Elasticsearch will ignore the choice of execution hint if it is not applicable and that there is no backward compatibility guarantee on these hints. - -==== Limitations - -[[div-sampler-breadth-first-nested-agg]] -===== Cannot be nested under `breadth_first` aggregations -Being a quality-based filter the diversified_sampler aggregation needs access to the relevance score produced for each document. -It therefore cannot be nested under a `terms` aggregation which has the `collect_mode` switched from the default `depth_first` mode to `breadth_first` as this discards scores. -In this situation an error will be thrown. - -===== Limited de-dup logic. -The de-duplication logic applies only at a shard level so will not apply across shards. - -[[spec-syntax-geo-date-fields]] -===== No specialized syntax for geo/date fields -Currently the syntax for defining the diversifying values is defined by a choice of `field` or -`script` - there is no added syntactical sugar for expressing geo or date units such as "7d" (7 -days). This support may be added in a later release and users will currently have to create these -sorts of values using a script. diff --git a/docs/reference/aggregations/bucket/filter-aggregation.asciidoc b/docs/reference/aggregations/bucket/filter-aggregation.asciidoc deleted file mode 100644 index 06568923e2fba..0000000000000 --- a/docs/reference/aggregations/bucket/filter-aggregation.asciidoc +++ /dev/null @@ -1,204 +0,0 @@ -[[search-aggregations-bucket-filter-aggregation]] -=== Filter aggregation -++++ -Filter -++++ - -A single bucket aggregation that narrows the set of documents -to those that match a <>. - -Example: - -[source,console,id=filter-aggregation-example] ----- -POST /sales/_search?size=0&filter_path=aggregations -{ - "aggs": { - "avg_price": { "avg": { "field": "price" } }, - "t_shirts": { - "filter": { "term": { "type": "t-shirt" } }, - "aggs": { - "avg_price": { "avg": { "field": "price" } } - } - } - } -} ----- -// TEST[setup:sales] - -The previous example calculates the average price of all sales as well as -the average price of all T-shirt sales. - -Response: - -[source,console-result] ----- -{ - "aggregations": { - "avg_price": { "value": 140.71428571428572 }, - "t_shirts": { - "doc_count": 3, - "avg_price": { "value": 128.33333333333334 } - } - } -} ----- - -[[use-top-level-query-to-limit-all-aggs]] -==== Use a top-level `query` to limit all aggregations - -To limit the documents on which all aggregations in a search run, use a -top-level `query`. This is faster than a single `filter` aggregation with -sub-aggregations. - -For example, use this: - - -[source,console,id=filter-aggregation-top-good] ----- -POST /sales/_search?size=0&filter_path=aggregations -{ - "query": { "term": { "type": "t-shirt" } }, - "aggs": { - "avg_price": { "avg": { "field": "price" } } - } -} ----- -// TEST[setup:sales] - -//// -[source,console-result] ----- -{ - "aggregations": { - "avg_price": { "value": 128.33333333333334 } - } -} ----- -//// - -Instead of this: - -[source,console,id=filter-aggregation-top-bad] ----- -POST /sales/_search?size=0&filter_path=aggregations -{ - "aggs": { - "t_shirts": { - "filter": { "term": { "type": "t-shirt" } }, - "aggs": { - "avg_price": { "avg": { "field": "price" } } - } - } - } -} ----- -// TEST[setup:sales] - -//// -[source,console-result] ----- -{ - "aggregations": { - "t_shirts": { - "doc_count": 3, - "avg_price": { "value": 128.33333333333334 } - } - } -} ----- -//// - -[[use-filters-agg-for-multiple-filters]] -==== Use the `filters` aggregation for multiple filters - -To group documents using multiple filters, use the -<>. This -is faster than multiple `filter` aggregations. - -For example, use this: - -[source,console,id=filter-aggregation-many-good] ----- -POST /sales/_search?size=0&filter_path=aggregations -{ - "aggs": { - "f": { - "filters": { - "filters": { - "hats": { "term": { "type": "hat" } }, - "t_shirts": { "term": { "type": "t-shirt" } } - } - }, - "aggs": { - "avg_price": { "avg": { "field": "price" } } - } - } - } -} ----- -// TEST[setup:sales] - -//// -[source,console-result] ----- -{ - "aggregations": { - "f": { - "buckets": { - "hats": { - "doc_count": 3, - "avg_price": { "value": 150.0 } - }, - "t_shirts": { - "doc_count": 3, - "avg_price": { "value": 128.33333333333334 } - } - } - } - } -} ----- -//// - -Instead of this: - -[source,console,id=filter-aggregation-many-bad] ----- -POST /sales/_search?size=0&filter_path=aggregations -{ - "aggs": { - "hats": { - "filter": { "term": { "type": "hat" } }, - "aggs": { - "avg_price": { "avg": { "field": "price" } } - } - }, - "t_shirts": { - "filter": { "term": { "type": "t-shirt" } }, - "aggs": { - "avg_price": { "avg": { "field": "price" } } - } - } - } -} ----- -// TEST[setup:sales] - -//// -[source,console-result] ----- -{ - "aggregations": { - "hats": { - "doc_count": 3, - "avg_price": { "value": 150.0 } - }, - "t_shirts": { - "doc_count": 3, - "avg_price": { "value": 128.33333333333334 } - } - } -} ----- -//// diff --git a/docs/reference/aggregations/bucket/filters-aggregation.asciidoc b/docs/reference/aggregations/bucket/filters-aggregation.asciidoc deleted file mode 100644 index d677d28862178..0000000000000 --- a/docs/reference/aggregations/bucket/filters-aggregation.asciidoc +++ /dev/null @@ -1,254 +0,0 @@ -[[search-aggregations-bucket-filters-aggregation]] -=== Filters aggregation -++++ -Filters -++++ - -A multi-bucket aggregation where each bucket contains the documents -that match a <>. - -Example: - -[source,console,id=filters-aggregation-example] --------------------------------------------------- -PUT /logs/_bulk?refresh -{ "index" : { "_id" : 1 } } -{ "body" : "warning: page could not be rendered" } -{ "index" : { "_id" : 2 } } -{ "body" : "authentication error" } -{ "index" : { "_id" : 3 } } -{ "body" : "warning: connection timed out" } - -GET logs/_search -{ - "size": 0, - "aggs" : { - "messages" : { - "filters" : { - "filters" : { - "errors" : { "match" : { "body" : "error" }}, - "warnings" : { "match" : { "body" : "warning" }} - } - } - } - } -} --------------------------------------------------- - -In the above example, we analyze log messages. The aggregation will build two -collection (buckets) of log messages - one for all those containing an error, -and another for all those containing a warning. - -Response: - -[source,console-result] --------------------------------------------------- -{ - "took": 9, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "messages": { - "buckets": { - "errors": { - "doc_count": 1 - }, - "warnings": { - "doc_count": 2 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 9/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -[[anonymous-filters]] -==== Anonymous filters - -The filters field can also be provided as an array of filters, as in the -following request: - -[source,console,id=filters-aggregation-anonymous-example] --------------------------------------------------- -GET logs/_search -{ - "size": 0, - "aggs" : { - "messages" : { - "filters" : { - "filters" : [ - { "match" : { "body" : "error" }}, - { "match" : { "body" : "warning" }} - ] - } - } - } -} --------------------------------------------------- -// TEST[continued] - -The filtered buckets are returned in the same order as provided in the -request. The response for this example would be: - -[source,console-result] --------------------------------------------------- -{ - "took": 4, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "messages": { - "buckets": [ - { - "doc_count": 1 - }, - { - "doc_count": 2 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 4/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -[[other-bucket]] -==== `Other` Bucket - -The `other_bucket` parameter can be set to add a bucket to the response which will contain all documents that do -not match any of the given filters. The value of this parameter can be as follows: - -`false`:: Does not compute the `other` bucket -`true`:: Returns the `other` bucket either in a bucket (named `_other_` by default) if named filters are being used, - or as the last bucket if anonymous filters are being used - -The `other_bucket_key` parameter can be used to set the key for the `other` bucket to a value other than the default `_other_`. Setting -this parameter will implicitly set the `other_bucket` parameter to `true`. - -The following snippet shows a response where the `other` bucket is requested to be named `other_messages`. - -[source,console,id=filters-aggregation-other-bucket-example] --------------------------------------------------- -PUT logs/_doc/4?refresh -{ - "body": "info: user Bob logged out" -} - -GET logs/_search -{ - "size": 0, - "aggs" : { - "messages" : { - "filters" : { - "other_bucket_key": "other_messages", - "filters" : { - "errors" : { "match" : { "body" : "error" }}, - "warnings" : { "match" : { "body" : "warning" }} - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -The response would be something like the following: - -[source,console-result] --------------------------------------------------- -{ - "took": 3, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "messages": { - "buckets": { - "errors": { - "doc_count": 1 - }, - "warnings": { - "doc_count": 2 - }, - "other_messages": { - "doc_count": 1 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 3/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -[[non-keyed-response]] -==== Non-keyed Response - -By default, the named filters aggregation returns the buckets as an object. But in some sorting cases, such as -<>, the JSON doesn't guarantee the order of elements -in the object. You can use the `keyed` parameter to specify the buckets as an array of objects. The value of this -parameter can be as follows: - -`true`:: (Default) Returns the buckets as an object -`false`:: Returns the buckets as an array of objects - -NOTE: This parameter is ignored by <>. - -Example: - -[source,console,id=filters-aggregation-sortable-example] ----- -POST /sales/_search?size=0&filter_path=aggregations -{ - "aggs": { - "the_filter": { - "filters": { - "keyed": false, - "filters": { - "t-shirt": { "term": { "type": "t-shirt" } }, - "hat": { "term": { "type": "hat" } } - } - }, - "aggs": { - "avg_price": { "avg": { "field": "price" } }, - "sort_by_avg_price": { - "bucket_sort": { "sort": { "avg_price": "asc" } } - } - } - } - } -} ----- -// TEST[setup:sales] - -Response: - -[source,console-result] ----- -{ - "aggregations": { - "the_filter": { - "buckets": [ - { - "key": "t-shirt", - "doc_count": 3, - "avg_price": { "value": 128.33333333333334 } - }, - { - "key": "hat", - "doc_count": 3, - "avg_price": { "value": 150.0 } - } - ] - } - } -} ----- diff --git a/docs/reference/aggregations/bucket/frequent-item-sets-aggregation.asciidoc b/docs/reference/aggregations/bucket/frequent-item-sets-aggregation.asciidoc deleted file mode 100644 index 01dacd0d6ccd5..0000000000000 --- a/docs/reference/aggregations/bucket/frequent-item-sets-aggregation.asciidoc +++ /dev/null @@ -1,405 +0,0 @@ -[[search-aggregations-bucket-frequent-item-sets-aggregation]] -=== Frequent item sets aggregation -++++ -Frequent item sets -++++ - -A bucket aggregation which finds frequent item sets. It is a form of association -rules mining that identifies items that often occur together. Items that are -frequently purchased together or log events that tend to co-occur are examples -of frequent item sets. Finding frequent item sets helps to discover -relationships between different data points (items). - -The aggregation reports closed item sets. A frequent item set is called closed -if no superset exists with the same ratio of documents (also known as its -<>). For example, we have the two -following candidates for a frequent item set, which have the same support value: -1. `apple, orange, banana` -2. `apple, orange, banana, tomato`. -Only the second item set (`apple, orange, banana, tomato`) is returned, and the -first set – which is a subset of the second one – is skipped. Both item sets -might be returned if their support values are different. - -The runtime of the aggregation depends on the data and the provided parameters. -It might take a significant time for the aggregation to complete. For this -reason, it is recommended to use <> to run your -requests asynchronously. - - -==== Syntax - -A `frequent_item_sets` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -"frequent_item_sets": { - "minimum_set_size": 3, - "fields": [ - {"field": "my_field_1"}, - {"field": "my_field_2"} - ] -} --------------------------------------------------- -// NOTCONSOLE - -.`frequent_item_sets` Parameters -|=== -|Parameter Name |Description |Required |Default Value -|`fields` |(array) Fields to analyze. | Required | -|`minimum_set_size` | (integer) The <> of one item set. | Optional | `1` -|`minimum_support` | (integer) The <> of one item set. | Optional | `0.1` -|`size` | (integer) The number of top item sets to return. | Optional | `10` -|`filter` | (object) Query that filters documents from the analysis | Optional | `match_all` -|=== - - -[discrete] -[[frequent-item-sets-fields]] -==== Fields - -Supported field types for the analyzed fields are keyword, numeric, ip, date, -and arrays of these types. You can also add runtime fields to your analyzed -fields. - -If the combined cardinality of the analyzed fields are high, the aggregation -might require a significant amount of system resources. - -You can filter the values for each field by using the `include` and `exclude` -parameters. The parameters can be regular expression strings or arrays of -strings of exact terms. The filtered values are removed from the analysis and -therefore reduce the runtime. If both `include` and `exclude` are defined, -`exclude` takes precedence; it means `include` is evaluated first and then -`exclude`. - -[discrete] -[[frequent-item-sets-minimum-set-size]] -==== Minimum set size - -The minimum set size is the minimum number of items the set needs to contain. A -value of 1 returns the frequency of single items. Only item sets that contain at -least the number of `minimum_set_size` items are returned. For example, the item -set `orange, banana, apple` is returned only if the minimum set size is 3 or -lower. - -[discrete] -[[frequent-item-sets-minimum-support]] -==== Minimum support - -The minimum support value is the ratio of documents that an item set must exist -in to be considered "frequent". In particular, it is a normalized value between -0 and 1. It is calculated by dividing the number of documents containing the -item set by the total number of documents. - -For example, if a given item set is contained by five documents and the total -number of documents is 20, then the support of the item set is 5/20 = 0.25. -Therefore, this set is returned only if the minimum support is 0.25 or lower. -As a higher minimum support prunes more items, the calculation is less resource -intensive. The `minimum_support` parameter has an effect on the required memory -and the runtime of the aggregation. - - -[discrete] -[[frequent-item-sets-size]] -==== Size - -This parameter defines the maximum number of item sets to return. The result -contains top-k item sets; the item sets with the highest support values. This -parameter has a significant effect on the required memory and the runtime of the -aggregation. - - -[discrete] -[[frequent-item-sets-filter]] -==== Filter - -A query to filter documents to use as part of the analysis. Documents that -don't match the filter are ignored when generating the item sets, however still -count when calculating the support of an item set. - -Use the filter if you want to narrow the item set analysis to fields of interest. -Use a top-level query to filter the data set. - - -[discrete] -[[frequent-item-sets-example]] -==== Examples - -In the following examples, we use the e-commerce {kib} sample data set. - - -[discrete] -==== Aggregation with two analyzed fields and an `exclude` parameter - -In the first example, the goal is to find out based on transaction data (1.) -from what product categories the customers purchase products frequently together -and (2.) from which cities they make those purchases. We want to exclude results -where location information is not available (where the city name is `other`). -Finally, we are interested in sets with three or more items, and want to see the -first three frequent item sets with the highest support. - -Note that we use the <> endpoint in this first -example. - -[source,console] -------------------------------------------------- -POST /kibana_sample_data_ecommerce/_async_search -{ - "size":0, - "aggs":{ - "my_agg":{ - "frequent_item_sets":{ - "minimum_set_size":3, - "fields":[ - { - "field":"category.keyword" - }, - { - "field":"geoip.city_name", - "exclude":"other" - } - ], - "size":3 - } - } - } -} -------------------------------------------------- -// TEST[skip:setup kibana sample data] - -The response of the API call above contains an identifier (`id`) of the async -search request. You can use the identifier to retrieve the search results: - -[source,console] -------------------------------------------------- -GET /_async_search/ -------------------------------------------------- -// TEST[skip:setup kibana sample data] - -The API returns a response similar to the following one: - -[source,console-result] -------------------------------------------------- -(...) -"aggregations" : { - "my_agg" : { - "buckets" : [ <1> - { - "key" : { <2> - "category.keyword" : [ - "Women's Clothing", - "Women's Shoes" - ], - "geoip.city_name" : [ - "New York" - ] - }, - "doc_count" : 217, <3> - "support" : 0.04641711229946524 <4> - }, - { - "key" : { - "category.keyword" : [ - "Women's Clothing", - "Women's Accessories" - ], - "geoip.city_name" : [ - "New York" - ] - }, - "doc_count" : 135, - "support" : 0.028877005347593583 - }, - { - "key" : { - "category.keyword" : [ - "Men's Clothing", - "Men's Shoes" - ], - "geoip.city_name" : [ - "Cairo" - ] - }, - "doc_count" : 123, - "support" : 0.026310160427807486 - } - ], - (...) - } -} -------------------------------------------------- -// TEST[skip:setup kibana sample data] - -<1> The array of returned item sets. -<2> The `key` object contains one item set. In this case, it consists of two -values of the `category.keyword` field and one value of the `geoip.city_name`. -<3> The number of documents that contain the item set. -<4> The support value of the item set. It is calculated by dividing the number -of documents containing the item set by the total number of documents. - -The response shows that the categories customers purchase from most frequently -together are `Women's Clothing` and `Women's Shoes` and customers from New York -tend to buy items from these categories frequently together. In other words, -customers who buy products labelled `Women's Clothing` more likely buy products -also from the `Women's Shoes` category and customers from New York most likely -buy products from these categories together. The item set with the second -highest support is `Women's Clothing` and `Women's Accessories` with customers -mostly from New York. Finally, the item set with the third highest support is -`Men's Clothing` and `Men's Shoes` with customers mostly from Cairo. - - -[discrete] -==== Aggregation with two analyzed fields and a filter - -We take the first example, but want to narrow the item sets to places in Europe. -For that, we add a filter, and this time, we don't use the `exclude` parameter: - -[source,console] -------------------------------------------------- -POST /kibana_sample_data_ecommerce/_async_search -{ - "size": 0, - "aggs": { - "my_agg": { - "frequent_item_sets": { - "minimum_set_size": 3, - "fields": [ - { "field": "category.keyword" }, - { "field": "geoip.city_name" } - ], - "size": 3, - "filter": { - "term": { - "geoip.continent_name": "Europe" - } - } - } - } - } -} -------------------------------------------------- -// TEST[skip:setup kibana sample data] - -The result will only show item sets that created from documents matching the -filter, namely purchases in Europe. Using `filter`, the calculated `support` -still takes all purchases into acount. That's different than specifying a query -at the top-level, in which case `support` gets calculated only from purchases in -Europe. - - -[discrete] -==== Analyzing numeric values by using a runtime field - -The frequent items aggregation enables you to bucket numeric values by using -<>. The next example demonstrates how to use a script to -add a runtime field to your documents called `price_range`, which is -calculated from the taxful total price of the individual transactions. The -runtime field then can be used in the frequent items aggregation as a field to -analyze. - - -[source,console] -------------------------------------------------- -GET kibana_sample_data_ecommerce/_search -{ - "runtime_mappings": { - "price_range": { - "type": "keyword", - "script": { - "source": """ - def bucket_start = (long) Math.floor(doc['taxful_total_price'].value / 50) * 50; - def bucket_end = bucket_start + 50; - emit(bucket_start.toString() + "-" + bucket_end.toString()); - """ - } - } - }, - "size": 0, - "aggs": { - "my_agg": { - "frequent_item_sets": { - "minimum_set_size": 4, - "fields": [ - { - "field": "category.keyword" - }, - { - "field": "price_range" - }, - { - "field": "geoip.city_name" - } - ], - "size": 3 - } - } - } -} -------------------------------------------------- -// TEST[skip:setup kibana sample data] - -The API returns a response similar to the following one: - -[source,console-result] -------------------------------------------------- -(...) -"aggregations" : { - "my_agg" : { - "buckets" : [ - { - "key" : { - "category.keyword" : [ - "Women's Clothing", - "Women's Shoes" - ], - "price_range" : [ - "50-100" - ], - "geoip.city_name" : [ - "New York" - ] - }, - "doc_count" : 100, - "support" : 0.0213903743315508 - }, - { - "key" : { - "category.keyword" : [ - "Women's Clothing", - "Women's Shoes" - ], - "price_range" : [ - "50-100" - ], - "geoip.city_name" : [ - "Dubai" - ] - }, - "doc_count" : 59, - "support" : 0.012620320855614974 - }, - { - "key" : { - "category.keyword" : [ - "Men's Clothing", - "Men's Shoes" - ], - "price_range" : [ - "50-100" - ], - "geoip.city_name" : [ - "Marrakesh" - ] - }, - "doc_count" : 53, - "support" : 0.011336898395721925 - } - ], - (...) - } - } -------------------------------------------------- -// TEST[skip:setup kibana sample data] - -The response shows the categories that customers purchase from most frequently -together, the location of the customers who tend to buy items from these -categories, and the most frequent price ranges of these purchases. diff --git a/docs/reference/aggregations/bucket/geodistance-aggregation.asciidoc b/docs/reference/aggregations/bucket/geodistance-aggregation.asciidoc deleted file mode 100644 index 5250b39a95045..0000000000000 --- a/docs/reference/aggregations/bucket/geodistance-aggregation.asciidoc +++ /dev/null @@ -1,253 +0,0 @@ -[[search-aggregations-bucket-geodistance-aggregation]] -=== Geo-distance aggregation -++++ -Geo-distance -++++ - -A multi-bucket aggregation that works on `geo_point` fields and conceptually works very similar to the <> aggregation. The user can define a point of origin and a set of distance range buckets. The aggregation evaluates the distance of each document value from the origin point and determines the buckets it belongs to based on the ranges (a document belongs to a bucket if the distance between the document and the origin falls within the distance range of the bucket). - -[source,console,id=geodistance-aggregation-example] --------------------------------------------------- -PUT /museums -{ - "mappings": { - "properties": { - "location": { - "type": "geo_point" - } - } - } -} - -POST /museums/_bulk?refresh -{"index":{"_id":1}} -{"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} -{"index":{"_id":2}} -{"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} -{"index":{"_id":3}} -{"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} -{"index":{"_id":4}} -{"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} -{"index":{"_id":5}} -{"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} -{"index":{"_id":6}} -{"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} - -POST /museums/_search?size=0 -{ - "aggs": { - "rings_around_amsterdam": { - "geo_distance": { - "field": "location", - "origin": "POINT (4.894 52.3760)", - "ranges": [ - { "to": 100000 }, - { "from": 100000, "to": 300000 }, - { "from": 300000 } - ] - } - } - } -} --------------------------------------------------- - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "rings_around_amsterdam": { - "buckets": [ - { - "key": "*-100000.0", - "from": 0.0, - "to": 100000.0, - "doc_count": 3 - }, - { - "key": "100000.0-300000.0", - "from": 100000.0, - "to": 300000.0, - "doc_count": 1 - }, - { - "key": "300000.0-*", - "from": 300000.0, - "doc_count": 2 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -The specified field must be of type `geo_point` (which can only be set explicitly in the mappings). And it can also hold an array of `geo_point` fields, in which case all will be taken into account during aggregation. The origin point can accept all formats supported by the <>: - -* Object format: `{ "lat" : 52.3760, "lon" : 4.894 }` - this is the safest format as it is the most explicit about the `lat` & `lon` values -* String format: `"52.3760, 4.894"` - where the first number is the `lat` and the second is the `lon` -* Array format: `[4.894, 52.3760]` - which is based on the GeoJSON standard where the first number is the `lon` and the second one is the `lat` - -By default, the distance unit is `m` (meters) but it can also accept: `mi` (miles), `in` (inches), `yd` (yards), `km` (kilometers), `cm` (centimeters), `mm` (millimeters). - -[source,console] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggs": { - "rings": { - "geo_distance": { - "field": "location", - "origin": "POINT (4.894 52.3760)", - "unit": "km", <1> - "ranges": [ - { "to": 100 }, - { "from": 100, "to": 300 }, - { "from": 300 } - ] - } - } - } -} --------------------------------------------------- -// TEST[continued] - -<1> The distances will be computed in kilometers - -There are two distance calculation modes: `arc` (the default), and `plane`. The `arc` calculation is the most accurate. The `plane` is the fastest but least accurate. Consider using `plane` when your search context is "narrow", and spans smaller geographical areas (~5km). `plane` will return higher error margins for searches across very large areas (e.g. cross continent search). The distance calculation type can be set using the `distance_type` parameter: - -[source,console] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggs": { - "rings": { - "geo_distance": { - "field": "location", - "origin": "POINT (4.894 52.3760)", - "unit": "km", - "distance_type": "plane", - "ranges": [ - { "to": 100 }, - { "from": 100, "to": 300 }, - { "from": 300 } - ] - } - } - } -} --------------------------------------------------- -// TEST[continued] - -==== Keyed Response - -Setting the `keyed` flag to `true` will associate a unique string key with each bucket and return the ranges as a hash rather than an array: - -[source,console] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggs": { - "rings_around_amsterdam": { - "geo_distance": { - "field": "location", - "origin": "POINT (4.894 52.3760)", - "ranges": [ - { "to": 100000 }, - { "from": 100000, "to": 300000 }, - { "from": 300000 } - ], - "keyed": true - } - } - } -} --------------------------------------------------- -// TEST[continued] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "rings_around_amsterdam": { - "buckets": { - "*-100000.0": { - "from": 0.0, - "to": 100000.0, - "doc_count": 3 - }, - "100000.0-300000.0": { - "from": 100000.0, - "to": 300000.0, - "doc_count": 1 - }, - "300000.0-*": { - "from": 300000.0, - "doc_count": 2 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -It is also possible to customize the key for each range: - -[source,console] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggs": { - "rings_around_amsterdam": { - "geo_distance": { - "field": "location", - "origin": "POINT (4.894 52.3760)", - "ranges": [ - { "to": 100000, "key": "first_ring" }, - { "from": 100000, "to": 300000, "key": "second_ring" }, - { "from": 300000, "key": "third_ring" } - ], - "keyed": true - } - } - } -} --------------------------------------------------- -// TEST[continued] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "rings_around_amsterdam": { - "buckets": { - "first_ring": { - "from": 0.0, - "to": 100000.0, - "doc_count": 3 - }, - "second_ring": { - "from": 100000.0, - "to": 300000.0, - "doc_count": 1 - }, - "third_ring": { - "from": 300000.0, - "doc_count": 2 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - diff --git a/docs/reference/aggregations/bucket/geohashgrid-aggregation.asciidoc b/docs/reference/aggregations/bucket/geohashgrid-aggregation.asciidoc deleted file mode 100644 index 39a36cfdf9f26..0000000000000 --- a/docs/reference/aggregations/bucket/geohashgrid-aggregation.asciidoc +++ /dev/null @@ -1,318 +0,0 @@ -[[search-aggregations-bucket-geohashgrid-aggregation]] -=== Geohash grid aggregation -++++ -Geohash grid -++++ - -A multi-bucket aggregation that groups <> and -<> values into buckets that represent a grid. -The resulting grid can be sparse and only contains cells that have matching data. Each cell is labeled using a {wikipedia}/Geohash[geohash] which is of user-definable precision. - -* High precision geohashes have a long string length and represent cells that cover only a small area. -* Low precision geohashes have a short string length and represent cells that each cover a large area. - -Geohashes used in this aggregation can have a choice of precision between 1 and 12. - -WARNING: The highest-precision geohash of length 12 produces cells that cover less than a square metre of land and so high-precision requests can be very costly in terms of RAM and result sizes. -Please see the example below on how to first filter the aggregation to a smaller geographic area before requesting high-levels of detail. - -You can only use `geohash_grid` to aggregate an explicitly mapped `geo_point` or -`geo_shape` field. If the `geo_point` field contains an array, `geohash_grid` -aggregates all the array values. - - -==== Simple low-precision request - -[source,console,id=geohashgrid-aggregation-low-precision-example] --------------------------------------------------- -PUT /museums -{ - "mappings": { - "properties": { - "location": { - "type": "geo_point" - } - } - } -} - -POST /museums/_bulk?refresh -{"index":{"_id":1}} -{"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} -{"index":{"_id":2}} -{"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} -{"index":{"_id":3}} -{"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} -{"index":{"_id":4}} -{"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} -{"index":{"_id":5}} -{"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} -{"index":{"_id":6}} -{"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} - -POST /museums/_search?size=0 -{ - "aggregations": { - "large-grid": { - "geohash_grid": { - "field": "location", - "precision": 3 - } - } - } -} --------------------------------------------------- - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "large-grid": { - "buckets": [ - { - "key": "u17", - "doc_count": 3 - }, - { - "key": "u09", - "doc_count": 2 - }, - { - "key": "u15", - "doc_count": 1 - } - ] - } -} -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -==== High-precision requests - -When requesting detailed buckets (typically for displaying a "zoomed in" map) a filter like <> should be applied to narrow the subject area otherwise potentially millions of buckets will be created and returned. - -[source,console,id=geohashgrid-aggregation-high-precision-example] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggregations": { - "zoomed-in": { - "filter": { - "geo_bounding_box": { - "location": { - "top_left": "POINT (4.9 52.4)", - "bottom_right": "POINT (5.0 52.3)" - } - } - }, - "aggregations": { - "zoom1": { - "geohash_grid": { - "field": "location", - "precision": 8 - } - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -The geohashes returned by the `geohash_grid` aggregation can be also used for zooming in. To zoom into the -first geohash `u17` returned in the previous example, it should be specified as both `top_left` and `bottom_right` corner: - -[source,console] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggregations": { - "zoomed-in": { - "filter": { - "geo_bounding_box": { - "location": { - "top_left": "u17", - "bottom_right": "u17" - } - } - }, - "aggregations": { - "zoom1": { - "geohash_grid": { - "field": "location", - "precision": 8 - } - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "zoomed-in": { - "doc_count": 3, - "zoom1": { - "buckets": [ - { - "key": "u173zy3j", - "doc_count": 1 - }, - { - "key": "u173zvfz", - "doc_count": 1 - }, - { - "key": "u173zt90", - "doc_count": 1 - } - ] - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -For "zooming in" on the system that don't support geohashes, the bucket keys should be translated into bounding boxes using -one of available geohash libraries. For example, for javascript the https://github.com/sunng87/node-geohash[node-geohash] library -can be used: - -[source,js] --------------------------------------------------- -var geohash = require('ngeohash'); - -// bbox will contain [ 52.03125, 4.21875, 53.4375, 5.625 ] -// [ minlat, minlon, maxlat, maxlon] -var bbox = geohash.decode_bbox('u17'); --------------------------------------------------- -// NOTCONSOLE - -==== Requests with additional bounding box filtering - -The `geohash_grid` aggregation supports an optional `bounds` parameter -that restricts the cells considered to those that intersects the -bounds provided. The `bounds` parameter accepts the bounding box in -all the same <> of the -bounds specified in the Geo Bounding Box Query. This bounding box can be used with or -without an additional `geo_bounding_box` query filtering the points prior to aggregating. -It is an independent bounding box that can intersect with, be equal to, or be disjoint -to any additional `geo_bounding_box` queries defined in the context of the aggregation. - -[source,console,id=geohashgrid-aggregation-with-bounds] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggregations": { - "tiles-in-bounds": { - "geohash_grid": { - "field": "location", - "precision": 8, - "bounds": { - "top_left": "POINT (4.21875 53.4375)", - "bottom_right": "POINT (5.625 52.03125)" - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "tiles-in-bounds": { - "buckets": [ - { - "key": "u173zy3j", - "doc_count": 1 - }, - { - "key": "u173zvfz", - "doc_count": 1 - }, - { - "key": "u173zt90", - "doc_count": 1 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -==== Cell dimensions at the equator -The table below shows the metric dimensions for cells covered by various string lengths of geohash. -Cell dimensions vary with latitude and so the table is for the worst-case scenario at the equator. - -[horizontal] -*GeoHash length*:: *Area width x height* -1:: 5,009.4km x 4,992.6km -2:: 1,252.3km x 624.1km -3:: 156.5km x 156km -4:: 39.1km x 19.5km -5:: 4.9km x 4.9km -6:: 1.2km x 609.4m -7:: 152.9m x 152.4m -8:: 38.2m x 19m -9:: 4.8m x 4.8m -10:: 1.2m x 59.5cm -11:: 14.9cm x 14.9cm -12:: 3.7cm x 1.9cm - - -[discrete] -[role="xpack"] -==== Aggregating `geo_shape` fields - -Aggregating on <> fields works just as it does for points, except that a single -shape can be counted for in multiple tiles. A shape will contribute to the count of matching values -if any part of its shape intersects with that tile. Below is an image that demonstrates this: - - -image:images/spatial/geoshape_grid.png[] - -==== Options - -[horizontal] -field:: Mandatory. Field containing indexed geo-point or geo-shape - values. Must be explicitly mapped as a <> - or a <> field. If the field contains an - array, `geohash_grid` aggregates all array values. - -precision:: Optional. The string length of the geohashes used to define - cells/buckets in the results. Defaults to 5. - The precision can either be defined in terms of the integer - precision levels mentioned above. Values outside of [1,12] will - be rejected. - Alternatively, the precision level can be approximated from a - distance measure like "1km", "10m". The precision level is - calculate such that cells will not exceed the specified - size (diagonal) of the required precision. When this would lead - to precision levels higher than the supported 12 levels, - (e.g. for distances <5.6cm) the value is rejected. - -bounds:: Optional. The bounding box to filter the points in the bucket. - -size:: Optional. The maximum number of geohash buckets to return - (defaults to 10,000). When results are trimmed, buckets are - prioritised based on the volumes of documents they contain. - -shard_size:: Optional. To allow for more accurate counting of the top cells - returned in the final result the aggregation defaults to - returning `max(10,(size x number-of-shards))` buckets from each - shard. If this heuristic is undesirable, the number considered - from each shard can be over-ridden using this parameter. diff --git a/docs/reference/aggregations/bucket/geohexgrid-aggregation.asciidoc b/docs/reference/aggregations/bucket/geohexgrid-aggregation.asciidoc deleted file mode 100644 index ce0fea4f4ea9f..0000000000000 --- a/docs/reference/aggregations/bucket/geohexgrid-aggregation.asciidoc +++ /dev/null @@ -1,274 +0,0 @@ -[role="xpack"] -[[search-aggregations-bucket-geohexgrid-aggregation]] -=== Geohex grid aggregation -++++ -Geohex grid -++++ - -A multi-bucket aggregation that groups <> and -<> values into buckets that represent a grid. -The resulting grid can be sparse and only -contains cells that have matching data. Each cell corresponds to a -https://h3geo.org/docs/core-library/h3Indexing#h3-cell-indexp[H3 cell index] and is -labeled using the https://h3geo.org/docs/core-library/h3Indexing#h3index-representation[H3Index representation]. - -See https://h3geo.org/docs/core-library/restable[the table of cell areas for H3 -resolutions] on how precision (zoom) correlates to size on the ground. -Precision for this aggregation can be between 0 and 15, inclusive. - -WARNING: High-precision requests can be very expensive in terms of RAM and -result sizes. For example, the highest-precision geohex with a precision of 15 -produces cells that cover less than one square meter. We recommend you use a -filter to limit high-precision requests to a smaller geographic area. For an example, -refer to <>. - -[[geohexgrid-low-precision]] -==== Simple low-precision request - -[source,console,id=geohexgrid-aggregation-example] --------------------------------------------------- -PUT /museums -{ - "mappings": { - "properties": { - "location": { - "type": "geo_point" - } - } - } -} - -POST /museums/_bulk?refresh -{"index":{"_id":1}} -{"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} -{"index":{"_id":2}} -{"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} -{"index":{"_id":3}} -{"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} -{"index":{"_id":4}} -{"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} -{"index":{"_id":5}} -{"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} -{"index":{"_id":6}} -{"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} - -POST /museums/_search?size=0 -{ - "aggregations": { - "large-grid": { - "geohex_grid": { - "field": "location", - "precision": 4 - } - } - } -} --------------------------------------------------- - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "large-grid": { - "buckets": [ - { - "key": "841969dffffffff", - "doc_count": 3 - }, - { - "key": "841fb47ffffffff", - "doc_count": 2 - }, - { - "key": "841fa4dffffffff", - "doc_count": 1 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -[[geohexgrid-high-precision]] -==== High-precision requests - -When requesting detailed buckets (typically for displaying a "zoomed in" map), -a filter like <> should be -applied to narrow the subject area. Otherwise, potentially millions of buckets -will be created and returned. - -[source,console,id=geohexgrid-high-precision-ex] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggregations": { - "zoomed-in": { - "filter": { - "geo_bounding_box": { - "location": { - "top_left": "POINT (4.9 52.4)", - "bottom_right": "POINT (5.0 52.3)" - } - } - }, - "aggregations": { - "zoom1": { - "geohex_grid": { - "field": "location", - "precision": 12 - } - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "zoomed-in": { - "doc_count": 3, - "zoom1": { - "buckets": [ - { - "key": "8c1969c9b2617ff", - "doc_count": 1 - }, - { - "key": "8c1969526d753ff", - "doc_count": 1 - }, - { - "key": "8c1969526d26dff", - "doc_count": 1 - } - ] - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -[[geohexgrid-addtl-bounding-box-filtering]] -==== Requests with additional bounding box filtering - -The `geohex_grid` aggregation supports an optional `bounds` parameter -that restricts the cells considered to those that intersect the -provided bounds. The `bounds` parameter accepts the same -<> -as the geo-bounding box query. This bounding box can be used with or -without an additional `geo_bounding_box` query for filtering the points prior to aggregating. -It is an independent bounding box that can intersect with, be equal to, or be disjoint -to any additional `geo_bounding_box` queries defined in the context of the aggregation. - -[source,console,id=geohexgrid-aggregation-with-bounds] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggregations": { - "tiles-in-bounds": { - "geohex_grid": { - "field": "location", - "precision": 12, - "bounds": { - "top_left": "POINT (4.9 52.4)", - "bottom_right": "POINT (5.0 52.3)" - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "tiles-in-bounds": { - "buckets": [ - { - "key": "8c1969c9b2617ff", - "doc_count": 1 - }, - { - "key": "8c1969526d753ff", - "doc_count": 1 - }, - { - "key": "8c1969526d26dff", - "doc_count": 1 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -[discrete] -[role="xpack"] -[[geohexgrid-aggregating-geo-shape]] -==== Aggregating `geo_shape` fields - -Aggregating on <> fields works almost as it does for points. There are two key differences: - -* When aggregating over `geo_point` data, points are considered within a hexagonal tile if they lie -within the edges defined by great circles. In other words the calculation is done using spherical coordinates. -However, when aggregating over `geo_shape` data, the shapes are considered within a hexagon if they lie -within the edges defined as straight lines on an equirectangular projection. -The reason is that Elasticsearch and Lucene treat edges using the equirectangular projection at index and search time. -In order to ensure that search results and aggregation results are aligned, we therefore also use equirectangular -projection in aggregations. -For most data, the difference is subtle or not noticed. -However, for low zoom levels (low precision), especially far from the equator, this can be noticeable. -For example, if the same point data is indexed as `geo_point` and `geo_shape`, it is possible to get -different results when aggregating at lower resolutions. -* As is the case with <>, -a single shape can be counted for in multiple tiles. A shape will contribute to the count of matching values -if any part of its shape intersects with that tile. Below is an image that demonstrates this: - - -image:images/spatial/geoshape_hexgrid.png[] - -==== Options - -[horizontal] -field:: -(Required, string) Field containing indexed geo-point or geo-shape values. -Must be explicitly mapped as a <> or a <> field. -If the field contains an array, `geohex_grid` aggregates all array values. - -precision:: -(Optional, integer) Integer zoom of the key used to define cells/buckets in -the results. Defaults to `6`. Values outside of [`0`,`15`] will be rejected. - -bounds:: -(Optional, object) Bounding box used to filter the geo-points or geo-shapes in each bucket. -Accepts the same bounding box formats as the -<>. - -size:: -(Optional, integer) Maximum number of buckets to return. Defaults to 10,000. -When results are trimmed, buckets are prioritized based on the volume of -documents they contain. - -shard_size:: -(Optional, integer) Number of buckets returned from each shard. Defaults to -`max(10,(size x number-of-shards))` to allow for a more accurate count of the -top cells in the final result. Since each shard could have a different top result order, -using a larger number here reduces the risk of inaccurate counts, but incurs a performance cost. diff --git a/docs/reference/aggregations/bucket/geotilegrid-aggregation.asciidoc b/docs/reference/aggregations/bucket/geotilegrid-aggregation.asciidoc deleted file mode 100644 index 413cc622d5d08..0000000000000 --- a/docs/reference/aggregations/bucket/geotilegrid-aggregation.asciidoc +++ /dev/null @@ -1,270 +0,0 @@ -[[search-aggregations-bucket-geotilegrid-aggregation]] -=== Geotile grid aggregation -++++ -Geotile grid -++++ - -A multi-bucket aggregation that groups <> and -<> values into buckets that represent a grid. -The resulting grid can be sparse and only -contains cells that have matching data. Each cell corresponds to a -{wikipedia}/Tiled_web_map[map tile] as used by many online map -sites. Each cell is labeled using a "{zoom}/{x}/{y}" format, where zoom is equal -to the user-specified precision. - -* High precision keys have a larger range for x and y, and represent tiles that -cover only a small area. -* Low precision keys have a smaller range for x and y, and represent tiles that -each cover a large area. - -See https://wiki.openstreetmap.org/wiki/Zoom_levels[zoom level documentation] -on how precision (zoom) correlates to size on the ground. Precision for this -aggregation can be between 0 and 29, inclusive. - -WARNING: The highest-precision geotile of length 29 produces cells that cover -less than a 10cm by 10cm of land and so high-precision requests can be very -costly in terms of RAM and result sizes. Please see the example below on how -to first filter the aggregation to a smaller geographic area before requesting -high-levels of detail. - -You can only use `geotile_grid` to aggregate an explicitly mapped `geo_point` or -`geo_shape` field. If the `geo_point` field contains an array, `geotile_grid` -aggregates all the array values. - - -==== Simple low-precision request - -[source,console,id=geotilegrid-aggregation-example] --------------------------------------------------- -PUT /museums -{ - "mappings": { - "properties": { - "location": { - "type": "geo_point" - } - } - } -} - -POST /museums/_bulk?refresh -{"index":{"_id":1}} -{"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} -{"index":{"_id":2}} -{"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} -{"index":{"_id":3}} -{"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} -{"index":{"_id":4}} -{"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} -{"index":{"_id":5}} -{"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} -{"index":{"_id":6}} -{"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} - -POST /museums/_search?size=0 -{ - "aggregations": { - "large-grid": { - "geotile_grid": { - "field": "location", - "precision": 8 - } - } - } -} --------------------------------------------------- - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "large-grid": { - "buckets": [ - { - "key": "8/131/84", - "doc_count": 3 - }, - { - "key": "8/129/88", - "doc_count": 2 - }, - { - "key": "8/131/85", - "doc_count": 1 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -[[geotilegrid-high-precision]] -==== High-precision requests - -When requesting detailed buckets (typically for displaying a "zoomed in" map), -a filter like <> should be -applied to narrow the subject area. Otherwise, potentially millions of buckets -will be created and returned. - -[source,console,id=geotilegrid-high-precision-ex] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggregations": { - "zoomed-in": { - "filter": { - "geo_bounding_box": { - "location": { - "top_left": "POINT (4.9 52.4)", - "bottom_right": "POINT (5.0 52.3)" - } - } - }, - "aggregations": { - "zoom1": { - "geotile_grid": { - "field": "location", - "precision": 22 - } - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "zoomed-in": { - "doc_count": 3, - "zoom1": { - "buckets": [ - { - "key": "22/2154412/1378379", - "doc_count": 1 - }, - { - "key": "22/2154385/1378332", - "doc_count": 1 - }, - { - "key": "22/2154259/1378425", - "doc_count": 1 - } - ] - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -[[geotilegrid-addtl-bounding-box-filtering]] -==== Requests with additional bounding box filtering - -The `geotile_grid` aggregation supports an optional `bounds` parameter -that restricts the cells considered to those that intersect the -provided bounds. The `bounds` parameter accepts the same -<> -as the geo-bounding box query. This bounding box can be used with or -without an additional `geo_bounding_box` query for filtering the points prior to aggregating. -It is an independent bounding box that can intersect with, be equal to, or be disjoint -to any additional `geo_bounding_box` queries defined in the context of the aggregation. - -[source,console,id=geotilegrid-aggregation-with-bounds] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggregations": { - "tiles-in-bounds": { - "geotile_grid": { - "field": "location", - "precision": 22, - "bounds": { - "top_left": "POINT (4.9 52.4)", - "bottom_right": "POINT (5.0 52.3)" - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "tiles-in-bounds": { - "buckets": [ - { - "key": "22/2154412/1378379", - "doc_count": 1 - }, - { - "key": "22/2154385/1378332", - "doc_count": 1 - }, - { - "key": "22/2154259/1378425", - "doc_count": 1 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -[discrete] -[role="xpack"] -[[geotilegrid-aggregating-geo-shape]] -==== Aggregating `geo_shape` fields - -Aggregating on <> fields works almost as it does for points, except that a single -shape can be counted for in multiple tiles. A shape will contribute to the count of matching values -if any part of its shape intersects with that tile. Below is an image that demonstrates this: - - -image:images/spatial/geoshape_grid.png[] - -==== Options - -[horizontal] -field:: -(Required, string) Field containing indexed geo-point or geo-shape values. -Must be explicitly mapped as a <> or a <> field. -If the field contains an array, `geotile_grid` aggregates all array values. - -precision:: -(Optional, integer) Integer zoom of the key used to define cells/buckets in -the results. Defaults to `7`. Values outside of [`0`,`29`] will be rejected. - -bounds:: -(Optional, object) Bounding box used to filter the geo-points or geo-shapes in each bucket. -Accepts the same bounding box formats as the -<>. - -size:: -(Optional, integer) Maximum number of buckets to return. Defaults to 10,000. -When results are trimmed, buckets are prioritized based on the volume of -documents they contain. - -shard_size:: -(Optional, integer) Number of buckets returned from each shard. Defaults to -`max(10,(size x number-of-shards))` to allow for a more accurate count of the -top cells in the final result. Since each shard could have a different top result order, -using a larger number here reduces the risk of inaccurate counts, but incurs a performance cost. diff --git a/docs/reference/aggregations/bucket/global-aggregation.asciidoc b/docs/reference/aggregations/bucket/global-aggregation.asciidoc deleted file mode 100644 index 043aabc494c86..0000000000000 --- a/docs/reference/aggregations/bucket/global-aggregation.asciidoc +++ /dev/null @@ -1,69 +0,0 @@ -[[search-aggregations-bucket-global-aggregation]] -=== Global aggregation -++++ -Global -++++ - -Defines a single bucket of all the documents within the search execution -context. This context is defined by the indices and the document types you're -searching on, but is *not* influenced by the search query itself. - -NOTE: Global aggregators can only be placed as top level aggregators because - it doesn't make sense to embed a global aggregator within another - bucket aggregator. - -Example: - -[source,console,id=global-aggregation-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "query": { - "match": { "type": "t-shirt" } - }, - "aggs": { - "all_products": { - "global": {}, <1> - "aggs": { <2> - "avg_price": { "avg": { "field": "price" } } - } - }, - "t_shirts": { "avg": { "field": "price" } } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> The `global` aggregation has an empty body -<2> The sub-aggregations that are registered for this `global` aggregation - -The above aggregation demonstrates how one would compute aggregations -(`avg_price` in this example) on all the documents in the search context, -regardless of the query (in our example, it will compute the average price over -all products in our catalog, not just on the "shirts"). - -The response for the above aggregation: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "all_products": { - "doc_count": 7, <1> - "avg_price": { - "value": 140.71428571428572 <2> - } - }, - "t_shirts": { - "value": 128.33333333333334 <3> - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -<1> The number of documents that were aggregated (in our case, all documents -within the search context) -<2> The average price of all products in the index -<3> The average price of all t-shirts diff --git a/docs/reference/aggregations/bucket/histogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/histogram-aggregation.asciidoc deleted file mode 100644 index c2ae23dccb7dc..0000000000000 --- a/docs/reference/aggregations/bucket/histogram-aggregation.asciidoc +++ /dev/null @@ -1,431 +0,0 @@ -[[search-aggregations-bucket-histogram-aggregation]] -=== Histogram aggregation -++++ -Histogram -++++ - -A multi-bucket values source based aggregation that can be applied on numeric values or numeric range values extracted -from the documents. It dynamically builds fixed size (a.k.a. interval) buckets over the values. For example, if the -documents have a field that holds a price (numeric), we can configure this aggregation to dynamically build buckets with -interval `5` (in case of price it may represent $5). When the aggregation executes, the price field of every document -will be evaluated and will be rounded down to its closest bucket - for example, if the price is `32` and the bucket size -is `5` then the rounding will yield `30` and thus the document will "fall" into the bucket that is associated with the -key `30`. -To make this more formal, here is the rounding function that is used: - -[source,java] --------------------------------------------------- -bucket_key = Math.floor((value - offset) / interval) * interval + offset --------------------------------------------------- - -For range values, a document can fall into multiple buckets. The first bucket is computed from the lower -bound of the range in the same way as a bucket for a single value is computed. The final bucket is computed in the same -way from the upper bound of the range, and the range is counted in all buckets in between and including those two. - -The `interval` must be a positive decimal, while the `offset` must be a decimal in `[0, interval)` -(a decimal greater than or equal to `0` and less than `interval`) - -The following snippet "buckets" the products based on their `price` by interval of `50`: - -[source,console,id=histogram-aggregation-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "prices": { - "histogram": { - "field": "price", - "interval": 50 - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "prices": { - "buckets": [ - { - "key": 0.0, - "doc_count": 1 - }, - { - "key": 50.0, - "doc_count": 1 - }, - { - "key": 100.0, - "doc_count": 0 - }, - { - "key": 150.0, - "doc_count": 2 - }, - { - "key": 200.0, - "doc_count": 3 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -==== Minimum document count - -The response above show that no documents has a price that falls within the range of `[100, 150)`. By default the -response will fill gaps in the histogram with empty buckets. It is possible to change that and request buckets with -a higher minimum count thanks to the `min_doc_count` setting: - -[source,console,id=histogram-aggregation-min-doc-count-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "prices": { - "histogram": { - "field": "price", - "interval": 50, - "min_doc_count": 1 - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "prices": { - "buckets": [ - { - "key": 0.0, - "doc_count": 1 - }, - { - "key": 50.0, - "doc_count": 1 - }, - { - "key": 150.0, - "doc_count": 2 - }, - { - "key": 200.0, - "doc_count": 3 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -[[search-aggregations-bucket-histogram-aggregation-extended-bounds]] -By default the `histogram` returns all the buckets within the range of the data itself, that is, the documents with -the smallest values (on which with histogram) will determine the min bucket (the bucket with the smallest key) and the -documents with the highest values will determine the max bucket (the bucket with the highest key). Often, when -requesting empty buckets, this causes a confusion, specifically, when the data is also filtered. - -To understand why, let's look at an example: - -Lets say the you're filtering your request to get all docs with values between `0` and `500`, in addition you'd like -to slice the data per price using a histogram with an interval of `50`. You also specify `"min_doc_count" : 0` as you'd -like to get all buckets even the empty ones. If it happens that all products (documents) have prices higher than `100`, -the first bucket you'll get will be the one with `100` as its key. This is confusing, as many times, you'd also like -to get those buckets between `0 - 100`. - -With `extended_bounds` setting, you now can "force" the histogram aggregation to start building buckets on a specific -`min` value and also keep on building buckets up to a `max` value (even if there are no documents anymore). Using -`extended_bounds` only makes sense when `min_doc_count` is 0 (the empty buckets will never be returned if `min_doc_count` -is greater than 0). - -Note that (as the name suggest) `extended_bounds` is **not** filtering buckets. Meaning, if the `extended_bounds.min` is higher -than the values extracted from the documents, the documents will still dictate what the first bucket will be (and the -same goes for the `extended_bounds.max` and the last bucket). For filtering buckets, one should nest the histogram aggregation -under a range `filter` aggregation with the appropriate `from`/`to` settings. - -Example: - -[source,console,id=histogram-aggregation-extended-bounds-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "query": { - "constant_score": { "filter": { "range": { "price": { "lte": "500" } } } } - }, - "aggs": { - "prices": { - "histogram": { - "field": "price", - "interval": 50, - "extended_bounds": { - "min": 0, - "max": 500 - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -When aggregating ranges, buckets are based on the values of the returned documents. This means the response may include -buckets outside of a query's range. For example, if your query looks for values greater than 100, and you have a range -covering 50 to 150, and an interval of 50, that document will land in 3 buckets - 50, 100, and 150. In general, it's -best to think of the query and aggregation steps as independent - the query selects a set of documents, and then the -aggregation buckets those documents without regard to how they were selected. -See <> for more information and an example. - -[[search-aggregations-bucket-histogram-aggregation-hard-bounds]] -The `hard_bounds` is a counterpart of `extended_bounds` and can limit the range of buckets in the histogram. It is -particularly useful in the case of open <> that can result in a very large number of buckets. - -Example: - -[source,console,id=histogram-aggregation-hard-bounds-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "query": { - "constant_score": { "filter": { "range": { "price": { "lte": "500" } } } } - }, - "aggs": { - "prices": { - "histogram": { - "field": "price", - "interval": 50, - "hard_bounds": { - "min": 100, - "max": 200 - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -In this example even though the range specified in the query is up to 500, the histogram will only have 2 buckets starting at 100 and 150. -All other buckets will be omitted even if documents that should go to this buckets are present in the results. - -==== Order - -By default the returned buckets are sorted by their `key` ascending, though the order behaviour can be controlled using -the `order` setting. Supports the same `order` functionality as the <>. - -==== Offset - -By default the bucket keys start with 0 and then continue in even spaced steps -of `interval`, e.g. if the interval is `10`, the first three buckets (assuming -there is data inside them) will be `[0, 10)`, `[10, 20)`, `[20, 30)`. The bucket -boundaries can be shifted by using the `offset` option. - -This can be best illustrated with an example. If there are 10 documents with values ranging from 5 to 14, using interval `10` will result in -two buckets with 5 documents each. If an additional offset `5` is used, there will be only one single bucket `[5, 15)` containing all the 10 -documents. - -==== Response Format - -By default, the buckets are returned as an ordered array. It is also possible to request the response as a hash -instead keyed by the buckets keys: - -[source,console,id=histogram-aggregation-keyed-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "prices": { - "histogram": { - "field": "price", - "interval": 50, - "keyed": true - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "prices": { - "buckets": { - "0.0": { - "key": 0.0, - "doc_count": 1 - }, - "50.0": { - "key": 50.0, - "doc_count": 1 - }, - "100.0": { - "key": 100.0, - "doc_count": 0 - }, - "150.0": { - "key": 150.0, - "doc_count": 2 - }, - "200.0": { - "key": 200.0, - "doc_count": 3 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they -had a value. - -[source,console,id=histogram-aggregation-missing-value-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "quantity": { - "histogram": { - "field": "quantity", - "interval": 10, - "missing": 0 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> Documents without a value in the `quantity` field will fall into the same bucket as documents that have the value `0`. - -[[search-aggregations-bucket-histogram-aggregation-histogram-fields]] -==== Histogram fields - -Running a histogram aggregation over histogram fields computes the total number of counts for each interval. - -For example, executing a histogram aggregation against the following index that stores pre-aggregated histograms -with latency metrics (in milliseconds) for different networks: - -[source,console] --------------------------------------------------- -PUT metrics_index -{ - "mappings": { - "properties": { - "network": { - "properties": { - "name": { - "type": "keyword" - } - } - }, - "latency_histo": { - "type": "histogram" - } - } - } -} - -PUT metrics_index/_doc/1?refresh -{ - "network.name" : "net-1", - "latency_histo" : { - "values" : [1, 3, 8, 12, 15], - "counts" : [3, 7, 23, 12, 6] - } -} - -PUT metrics_index/_doc/2?refresh -{ - "network.name" : "net-2", - "latency_histo" : { - "values" : [1, 6, 8, 12, 14], - "counts" : [8, 17, 8, 7, 6] - } -} - -POST /metrics_index/_search?size=0 -{ - "aggs": { - "latency_buckets": { - "histogram": { - "field": "latency_histo", - "interval": 5 - } - } - } -} --------------------------------------------------- - - -The `histogram` aggregation will sum the counts of each interval computed based on the `values` and -return the following output: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "latency_buckets": { - "buckets": [ - { - "key": 0.0, - "doc_count": 18 - }, - { - "key": 5.0, - "doc_count": 48 - }, - { - "key": 10.0, - "doc_count": 25 - }, - { - "key": 15.0, - "doc_count": 6 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -[IMPORTANT] -======== -Histogram aggregation is a bucket aggregation, which partitions documents into buckets rather than calculating metrics over fields like -metrics aggregations do. Each bucket represents a collection of documents which sub-aggregations can run on. -On the other hand, a histogram field is a pre-aggregated field representing multiple values inside a single field: -buckets of numerical data and a count of items/documents for each bucket. This mismatch between the histogram aggregations expected input -(expecting raw documents) and the histogram field (that provides summary information) limits the outcome of the aggregation -to only the doc counts for each bucket. - - -**Consequently, when executing a histogram aggregation over a histogram field, no sub-aggregations are allowed.** -======== - -Also, when running histogram aggregation over histogram field the `missing` parameter is not supported. diff --git a/docs/reference/aggregations/bucket/ipprefix-aggregation.asciidoc b/docs/reference/aggregations/bucket/ipprefix-aggregation.asciidoc deleted file mode 100644 index 2dee6654869f7..0000000000000 --- a/docs/reference/aggregations/bucket/ipprefix-aggregation.asciidoc +++ /dev/null @@ -1,387 +0,0 @@ -[[search-aggregations-bucket-ipprefix-aggregation]] -=== IP prefix aggregation -++++ -IP prefix -++++ - -A bucket aggregation that groups documents based on the network or sub-network of an IP address. An IP address consists of two groups of bits: the most significant bits which represent the network prefix, and the least significant bits which represent the host. - -[[ipprefix-agg-ex]] -==== Example - -For example, consider the following index: -[source,console] ----------------------------------------------- -PUT network-traffic -{ - "mappings": { - "properties": { - "ipv4": { "type": "ip" }, - "ipv6": { "type": "ip" } - } - } -} - -POST /network-traffic/_bulk?refresh -{"index":{"_id":0}} -{"ipv4":"192.168.1.10","ipv6":"2001:db8:a4f8:112a:6001:0:12:7f10"} -{"index":{"_id":1}} -{"ipv4":"192.168.1.12","ipv6":"2001:db8:a4f8:112a:6001:0:12:7f12"} -{"index":{"_id":2}} -{ "ipv4":"192.168.1.33","ipv6":"2001:db8:a4f8:112a:6001:0:12:7f33"} -{"index":{"_id":3}} -{"ipv4":"192.168.1.10","ipv6":"2001:db8:a4f8:112a:6001:0:12:7f10"} -{"index":{"_id":4}} -{"ipv4":"192.168.2.41","ipv6":"2001:db8:a4f8:112c:6001:0:12:7f41"} -{"index":{"_id":5}} -{"ipv4":"192.168.2.10","ipv6":"2001:db8:a4f8:112c:6001:0:12:7f10"} -{"index":{"_id":6}} -{"ipv4":"192.168.2.23","ipv6":"2001:db8:a4f8:112c:6001:0:12:7f23"} -{"index":{"_id":7}} -{"ipv4":"192.168.3.201","ipv6":"2001:db8:a4f8:114f:6001:0:12:7201"} -{"index":{"_id":8}} -{"ipv4":"192.168.3.107","ipv6":"2001:db8:a4f8:114f:6001:0:12:7307"} ----------------------------------------------- -// TESTSETUP - -The following aggregation groups documents into buckets. Each bucket identifies a different sub-network. The sub-network is calculated by applying a netmask with prefix length of `24` to each IP address in the `ipv4` field: - -[source,console,id=ip-prefix-ipv4-example] --------------------------------------------------- -GET /network-traffic/_search -{ - "size": 0, - "aggs": { - "ipv4-subnets": { - "ip_prefix": { - "field": "ipv4", - "prefix_length": 24 - } - } - } -} --------------------------------------------------- -// TEST - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "ipv4-subnets": { - "buckets": [ - { - "key": "192.168.1.0", - "is_ipv6": false, - "doc_count": 4, - "prefix_length": 24, - "netmask": "255.255.255.0" - }, - { - "key": "192.168.2.0", - "is_ipv6": false, - "doc_count": 3, - "prefix_length": 24, - "netmask": "255.255.255.0" - }, - { - "key": "192.168.3.0", - "is_ipv6": false, - "doc_count": 2, - "prefix_length": 24, - "netmask": "255.255.255.0" - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -To aggregate IPv6 addresses, set `is_ipv6` to `true`. - -[source,console,id=ip-prefix-ipv6-example] --------------------------------------------------- -GET /network-traffic/_search -{ - "size": 0, - "aggs": { - "ipv6-subnets": { - "ip_prefix": { - "field": "ipv6", - "prefix_length": 64, - "is_ipv6": true - } - } - } -} --------------------------------------------------- -// TEST - -If `is_ipv6` is `true`, the response doesn't include a `netmask` for each bucket. - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "ipv6-subnets": { - "buckets": [ - { - "key": "2001:db8:a4f8:112a::", - "is_ipv6": true, - "doc_count": 4, - "prefix_length": 64 - }, - { - "key": "2001:db8:a4f8:112c::", - "is_ipv6": true, - "doc_count": 3, - "prefix_length": 64 - }, - { - "key": "2001:db8:a4f8:114f::", - "is_ipv6": true, - "doc_count": 2, - "prefix_length": 64 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -[role="child_attributes"] -[[ip-prefix-agg-params]] -==== Parameters - -`field`:: -(Required, string) -The document IP address field to aggregate on. The field mapping type must be <>. - -`prefix_length`:: -(Required, integer) -Length of the network prefix. For IPv4 addresses, the accepted range is `[0, 32]`. For IPv6 addresses, the accepted range is `[0, 128]`. - -`is_ipv6`:: -(Optional, boolean) -Defines whether the prefix applies to IPv6 addresses. Just specifying the `prefix_length` parameter is not enough to know if an IP prefix applies to IPv4 or IPv6 addresses. Defaults to `false`. - -`append_prefix_length`:: -(Optional, boolean) -Defines whether the prefix length is appended to IP address keys in the response. Defaults to `false`. - -`keyed`:: -(Optional, boolean) -Defines whether buckets are returned as a hash rather than an array in the response. Defaults to `false`. - -`min_doc_count`:: -(Optional, integer) -Defines the minimum number of documents for buckets to be included in the response. Defaults to `1`. - - -[[ipprefix-agg-response]] -==== Response body - -`key`:: -(string) -The IPv6 or IPv4 subnet. - -`prefix_length`:: -(integer) -The length of the prefix used to aggregate the bucket. - -`doc_count`:: -(integer) -Number of documents matching a specific IP prefix. - -`is_ipv6`:: -(boolean) -Defines whether the netmask is an IPv6 netmask. - -`netmask`:: -(string) -The IPv4 netmask. If `is_ipv6` is `true` in the request, this field is missing in the response. - -[[ipprefix-agg-keyed-response]] -==== Keyed Response - -Set the `keyed` flag of `true` to associate an unique IP address key with each bucket and return sub-networks as a hash rather than an array. - -Example: - -[source,console,id=ip-prefix-keyed-example] --------------------------------------------------- -GET /network-traffic/_search -{ - "size": 0, - "aggs": { - "ipv4-subnets": { - "ip_prefix": { - "field": "ipv4", - "prefix_length": 24, - "keyed": true - } - } - } -} --------------------------------------------------- -// TEST - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "ipv4-subnets": { - "buckets": { - "192.168.1.0": { - "is_ipv6": false, - "doc_count": 4, - "prefix_length": 24, - "netmask": "255.255.255.0" - }, - "192.168.2.0": { - "is_ipv6": false, - "doc_count": 3, - "prefix_length": 24, - "netmask": "255.255.255.0" - }, - "192.168.3.0": { - "is_ipv6": false, - "doc_count": 2, - "prefix_length": 24, - "netmask": "255.255.255.0" - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -[[ipprefix-agg-append-prefix-length]] -==== Append the prefix length to the IP address key - -Set the `append_prefix_length` flag to `true` to catenate IP address keys with the prefix length of the sub-network. - -Example: - -[source,console,id=ip-prefix-append-prefix-len-example] --------------------------------------------------- -GET /network-traffic/_search -{ - "size": 0, - "aggs": { - "ipv4-subnets": { - "ip_prefix": { - "field": "ipv4", - "prefix_length": 24, - "append_prefix_length": true - } - } - } -} --------------------------------------------------- -// TEST - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "ipv4-subnets": { - "buckets": [ - { - "key": "192.168.1.0/24", - "is_ipv6": false, - "doc_count": 4, - "prefix_length": 24, - "netmask": "255.255.255.0" - }, - { - "key": "192.168.2.0/24", - "is_ipv6": false, - "doc_count": 3, - "prefix_length": 24, - "netmask": "255.255.255.0" - }, - { - "key": "192.168.3.0/24", - "is_ipv6": false, - "doc_count": 2, - "prefix_length": 24, - "netmask": "255.255.255.0" - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -[[ipprefix-agg-min-doc-count]] -==== Minimum document count - -Use the `min_doc_count` parameter to only return buckets with a minimum number of documents. - -[source,console,id=ip-prefix-min-doc-count-example] --------------------------------------------------- -GET /network-traffic/_search -{ - "size": 0, - "aggs": { - "ipv4-subnets": { - "ip_prefix": { - "field": "ipv4", - "prefix_length": 24, - "min_doc_count": 3 - } - } - } -} --------------------------------------------------- -// TEST - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "ipv4-subnets": { - "buckets": [ - { - "key": "192.168.1.0", - "is_ipv6": false, - "doc_count": 4, - "prefix_length": 24, - "netmask": "255.255.255.0" - }, - { - "key": "192.168.2.0", - "is_ipv6": false, - "doc_count": 3, - "prefix_length": 24, - "netmask": "255.255.255.0" - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - diff --git a/docs/reference/aggregations/bucket/iprange-aggregation.asciidoc b/docs/reference/aggregations/bucket/iprange-aggregation.asciidoc deleted file mode 100644 index 0bc95522d69ed..0000000000000 --- a/docs/reference/aggregations/bucket/iprange-aggregation.asciidoc +++ /dev/null @@ -1,205 +0,0 @@ -[[search-aggregations-bucket-iprange-aggregation]] -=== IP range aggregation -++++ -IP range -++++ - -Just like the dedicated <> range aggregation, there is also a dedicated range aggregation for IP typed fields: - -Example: - -[source,console,id=ip-range-example] --------------------------------------------------- -GET /ip_addresses/_search -{ - "size": 10, - "aggs": { - "ip_ranges": { - "ip_range": { - "field": "ip", - "ranges": [ - { "to": "10.0.0.5" }, - { "from": "10.0.0.5" } - ] - } - } - } -} --------------------------------------------------- -// TEST[setup:iprange] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "ip_ranges": { - "buckets": [ - { - "key": "*-10.0.0.5", - "to": "10.0.0.5", - "doc_count": 10 - }, - { - "key": "10.0.0.5-*", - "from": "10.0.0.5", - "doc_count": 260 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -IP ranges can also be defined as CIDR masks: - -[source,console,id=ip-range-cidr-example] --------------------------------------------------- -GET /ip_addresses/_search -{ - "size": 0, - "aggs": { - "ip_ranges": { - "ip_range": { - "field": "ip", - "ranges": [ - { "mask": "10.0.0.0/25" }, - { "mask": "10.0.0.127/25" } - ] - } - } - } -} --------------------------------------------------- -// TEST[setup:iprange] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "ip_ranges": { - "buckets": [ - { - "key": "10.0.0.0/25", - "from": "10.0.0.0", - "to": "10.0.0.128", - "doc_count": 128 - }, - { - "key": "10.0.0.127/25", - "from": "10.0.0.0", - "to": "10.0.0.128", - "doc_count": 128 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -==== Keyed Response - -Setting the `keyed` flag to `true` will associate a unique string key with each bucket and return the ranges as a hash rather than an array: - -[source,console,id=ip-range-keyed-example] --------------------------------------------------- -GET /ip_addresses/_search -{ - "size": 0, - "aggs": { - "ip_ranges": { - "ip_range": { - "field": "ip", - "ranges": [ - { "to": "10.0.0.5" }, - { "from": "10.0.0.5" } - ], - "keyed": true - } - } - } -} --------------------------------------------------- -// TEST[setup:iprange] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "ip_ranges": { - "buckets": { - "*-10.0.0.5": { - "to": "10.0.0.5", - "doc_count": 10 - }, - "10.0.0.5-*": { - "from": "10.0.0.5", - "doc_count": 260 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -It is also possible to customize the key for each range: - -[source,console,id=ip-range-keyed-customized-keys-example] --------------------------------------------------- -GET /ip_addresses/_search -{ - "size": 0, - "aggs": { - "ip_ranges": { - "ip_range": { - "field": "ip", - "ranges": [ - { "key": "infinity", "to": "10.0.0.5" }, - { "key": "and-beyond", "from": "10.0.0.5" } - ], - "keyed": true - } - } - } -} --------------------------------------------------- -// TEST[setup:iprange] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "ip_ranges": { - "buckets": { - "infinity": { - "to": "10.0.0.5", - "doc_count": 10 - }, - "and-beyond": { - "from": "10.0.0.5", - "doc_count": 260 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] diff --git a/docs/reference/aggregations/bucket/missing-aggregation.asciidoc b/docs/reference/aggregations/bucket/missing-aggregation.asciidoc deleted file mode 100644 index 8553987778f8c..0000000000000 --- a/docs/reference/aggregations/bucket/missing-aggregation.asciidoc +++ /dev/null @@ -1,39 +0,0 @@ -[[search-aggregations-bucket-missing-aggregation]] -=== Missing aggregation -++++ -Missing -++++ - -A field data based single bucket aggregation, that creates a bucket of all documents in the current document set context that are missing a field value (effectively, missing a field or having the configured NULL value set). This aggregator will often be used in conjunction with other field data bucket aggregators (such as ranges) to return information for all the documents that could not be placed in any of the other buckets due to missing field data values. - -Example: - -[source,console,id=missing-aggregation-example] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "products_without_a_price": { - "missing": { "field": "price" } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -In the above example, we get the total number of products that do not have a price. - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "products_without_a_price": { - "doc_count": 0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] diff --git a/docs/reference/aggregations/bucket/multi-terms-aggregation.asciidoc b/docs/reference/aggregations/bucket/multi-terms-aggregation.asciidoc deleted file mode 100644 index 48688ee02ff18..0000000000000 --- a/docs/reference/aggregations/bucket/multi-terms-aggregation.asciidoc +++ /dev/null @@ -1,431 +0,0 @@ -[role="xpack"] -[[search-aggregations-bucket-multi-terms-aggregation]] -=== Multi Terms aggregation -++++ -Multi Terms -++++ - -A multi-bucket value source based aggregation where buckets are dynamically built - one per unique set of values. The multi terms -aggregation is very similar to the <>, however in most cases -it will be slower than the terms aggregation and will consume more memory. Therefore, if the same set of fields is constantly used, -it would be more efficient to index a combined key for this fields as a separate field and use the terms aggregation on this field. - -The multi_term aggregations are the most useful when you need to sort by a number of document or a metric aggregation on a composite -key and get top N results. If sorting is not required and all values are expected to be retrieved using nested terms aggregation or -<> will be a faster and more memory efficient solution. - -////////////////////////// - -[source,js] --------------------------------------------------- -PUT /products -{ - "mappings": { - "properties": { - "genre": { - "type": "keyword" - }, - "product": { - "type": "keyword" - }, - "quantity": { - "type": "integer" - } - } - } -} - -POST /products/_bulk?refresh -{"index":{"_id":0}} -{"genre": "rock", "product": "Product A", "quantity": 4} -{"index":{"_id":1}} -{"genre": "rock", "product": "Product A", "quantity": 5} -{"index":{"_id":2}} -{"genre": "rock", "product": "Product B", "quantity": 1} -{"index":{"_id":3}} -{"genre": "jazz", "product": "Product B", "quantity": 10} -{"index":{"_id":4}} -{"genre": "electronic", "product": "Product B", "quantity": 3} -{"index":{"_id":5}} -{"genre": "electronic"} - -------------------------------------------------- -// NOTCONSOLE -// TESTSETUP - -////////////////////////// - -Example: - -[source,console,id=multi-terms-aggregation-example] --------------------------------------------------- -GET /products/_search -{ - "aggs": { - "genres_and_products": { - "multi_terms": { - "terms": [{ - "field": "genre" <1> - }, { - "field": "product" - }] - } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search\?filter_path=aggregations/] - -<1> `multi_terms` aggregation can work with the same field types as a -<> and supports most of the terms aggregation parameters. - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations" : { - "genres_and_products" : { - "doc_count_error_upper_bound" : 0, <1> - "sum_other_doc_count" : 0, <2> - "buckets" : [ <3> - { - "key" : [ <4> - "rock", - "Product A" - ], - "key_as_string" : "rock|Product A", - "doc_count" : 2 - }, - { - "key" : [ - "electronic", - "Product B" - ], - "key_as_string" : "electronic|Product B", - "doc_count" : 1 - }, - { - "key" : [ - "jazz", - "Product B" - ], - "key_as_string" : "jazz|Product B", - "doc_count" : 1 - }, - { - "key" : [ - "rock", - "Product B" - ], - "key_as_string" : "rock|Product B", - "doc_count" : 1 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] - -<1> an upper bound of the error on the document counts for each term, see < -<2> when there are lots of unique terms, Elasticsearch only returns the top terms; this number is the sum of the document counts for all buckets that are not part of the response -<3> the list of the top buckets. -<4> the keys are arrays of values ordered the same ways as expression in the `terms` parameter of the aggregation - -By default, the `multi_terms` aggregation will return the buckets for the top ten terms ordered by the `doc_count`. One can -change this default behaviour by setting the `size` parameter. - -[[search-aggregations-bucket-multi-terms-aggregation-parameters]] -==== Aggregation Parameters - -The following parameters are supported. See <> for more detailed -explanation of these parameters. - -[horizontal] -size:: Optional. Defines how many term buckets should be returned out of the overall terms list. Defaults to 10. - -shard_size:: Optional. The higher the requested `size` is, the more accurate the results will be, but also, the more - expensive it will be to compute the final results. The default `shard_size` is `(size * 1.5 + 10)`. - -show_term_doc_count_error:: Optional. Calculates the doc count error on per term basis. Defaults to `false` - -order:: Optional. Specifies the order of the buckets. Defaults to the number of documents per bucket. The bucket terms - value is used as a tiebreaker for buckets with the same document count. - -min_doc_count:: Optional. The minimal number of documents in a bucket for it to be returned. Defaults to 1. - -shard_min_doc_count:: Optional. The minimal number of documents in a bucket on each shard for it to be returned. Defaults to - `min_doc_count`. - -collect_mode:: Optional. Specifies the strategy for data collection. The `depth_first` or `breadth_first` modes are - supported. Defaults to `breadth_first`. - - -[[search-aggregations-bucket-multi-terms-aggregation-script]] -==== Script - -Generating the terms using a script: - -[source,console,id=multi-terms-aggregation-runtime-field-example] ----- -GET /products/_search -{ - "runtime_mappings": { - "genre.length": { - "type": "long", - "script": "emit(doc['genre'].value.length())" - } - }, - "aggs": { - "genres_and_products": { - "multi_terms": { - "terms": [ - { - "field": "genre.length" - }, - { - "field": "product" - } - ] - } - } - } -} ----- -// TEST[s/_search/_search\?filter_path=aggregations/] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations" : { - "genres_and_products" : { - "doc_count_error_upper_bound" : 0, - "sum_other_doc_count" : 0, - "buckets" : [ - { - "key" : [ - 4, - "Product A" - ], - "key_as_string" : "4|Product A", - "doc_count" : 2 - }, - { - "key" : [ - 4, - "Product B" - ], - "key_as_string" : "4|Product B", - "doc_count" : 2 - }, - { - "key" : [ - 10, - "Product B" - ], - "key_as_string" : "10|Product B", - "doc_count" : 1 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default if any of the key components are missing the entire document will be ignored -but it is also possible to treat them as if they had a value by using the `missing` parameter. - -[source,console,id=multi-terms-aggregation-missing-example] --------------------------------------------------- -GET /products/_search -{ - "aggs": { - "genres_and_products": { - "multi_terms": { - "terms": [ - { - "field": "genre" - }, - { - "field": "product", - "missing": "Product Z" - } - ] - } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search\?filter_path=aggregations/] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations" : { - "genres_and_products" : { - "doc_count_error_upper_bound" : 0, - "sum_other_doc_count" : 0, - "buckets" : [ - { - "key" : [ - "rock", - "Product A" - ], - "key_as_string" : "rock|Product A", - "doc_count" : 2 - }, - { - "key" : [ - "electronic", - "Product B" - ], - "key_as_string" : "electronic|Product B", - "doc_count" : 1 - }, - { - "key" : [ - "electronic", - "Product Z" - ], - "key_as_string" : "electronic|Product Z", <1> - "doc_count" : 1 - }, - { - "key" : [ - "jazz", - "Product B" - ], - "key_as_string" : "jazz|Product B", - "doc_count" : 1 - }, - { - "key" : [ - "rock", - "Product B" - ], - "key_as_string" : "rock|Product B", - "doc_count" : 1 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] - -<1> Documents without a value in the `product` field will fall into the same bucket as documents that have the value `Product Z`. - -==== Mixing field types - -WARNING: When aggregating on multiple indices the type of the aggregated field may not be the same in all indices. -Some types are compatible with each other (`integer` and `long` or `float` and `double`) but when the types are a mix -of decimal and non-decimal number the terms aggregation will promote the non-decimal numbers to decimal numbers. -This can result in a loss of precision in the bucket values. - -==== Sub aggregation and sorting examples - -As most bucket aggregations the `multi_term` supports sub aggregations and ordering the buckets by metrics sub-aggregation: - -[source,console,id=multi-terms-aggregation-subaggregation-example] --------------------------------------------------- -GET /products/_search -{ - "aggs": { - "genres_and_products": { - "multi_terms": { - "terms": [ - { - "field": "genre" - }, - { - "field": "product" - } - ], - "order": { - "total_quantity": "desc" - } - }, - "aggs": { - "total_quantity": { - "sum": { - "field": "quantity" - } - } - } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search\?filter_path=aggregations/] - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations" : { - "genres_and_products" : { - "doc_count_error_upper_bound" : 0, - "sum_other_doc_count" : 0, - "buckets" : [ - { - "key" : [ - "jazz", - "Product B" - ], - "key_as_string" : "jazz|Product B", - "doc_count" : 1, - "total_quantity" : { - "value" : 10.0 - } - }, - { - "key" : [ - "rock", - "Product A" - ], - "key_as_string" : "rock|Product A", - "doc_count" : 2, - "total_quantity" : { - "value" : 9.0 - } - }, - { - "key" : [ - "electronic", - "Product B" - ], - "key_as_string" : "electronic|Product B", - "doc_count" : 1, - "total_quantity" : { - "value" : 3.0 - } - }, - { - "key" : [ - "rock", - "Product B" - ], - "key_as_string" : "rock|Product B", - "doc_count" : 1, - "total_quantity" : { - "value" : 1.0 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] diff --git a/docs/reference/aggregations/bucket/nested-aggregation.asciidoc b/docs/reference/aggregations/bucket/nested-aggregation.asciidoc deleted file mode 100644 index b7403599119c2..0000000000000 --- a/docs/reference/aggregations/bucket/nested-aggregation.asciidoc +++ /dev/null @@ -1,175 +0,0 @@ -[[search-aggregations-bucket-nested-aggregation]] -=== Nested aggregation -++++ -Nested -++++ - -A special single bucket aggregation that enables aggregating nested documents. - -For example, lets say we have an index of products, and each product holds the list of resellers - each having its own -price for the product. The mapping could look like: - -[source,console,id=nested-aggregation-example] ----- -PUT /products -{ - "mappings": { - "properties": { - "resellers": { <1> - "type": "nested", - "properties": { - "reseller": { - "type": "keyword" - }, - "price": { - "type": "double" - } - } - } - } - } -} ----- -<1> `resellers` is an array that holds nested documents. - -The following request adds a product with two resellers: - -[source,console] ----- -PUT /products/_doc/0?refresh -{ - "name": "LED TV", <1> - "resellers": [ - { - "reseller": "companyA", - "price": 350 - }, - { - "reseller": "companyB", - "price": 500 - } - ] -} ----- -// TEST[continued] - -<1> We are using a dynamic mapping for the `name` attribute. - - -The following request returns the minimum price a product can be purchased for: - -[source,console] ----- -GET /products/_search?size=0 -{ - "query": { - "match": { - "name": "led tv" - } - }, - "aggs": { - "resellers": { - "nested": { - "path": "resellers" - }, - "aggs": { - "min_price": { - "min": { - "field": "resellers.price" - } - } - } - } - } -} ----- -// TEST[s/size=0/size=0&filter_path=aggregations/] -// TEST[continued] - -As you can see above, the nested aggregation requires the `path` of the nested documents within the top level documents. -Then one can define any type of aggregation over these nested documents. - -Response: - -[source,console-result] ----- -{ - ... - "aggregations": { - "resellers": { - "doc_count": 2, - "min_price": { - "value": 350.0 - } - } - } -} ----- -// TESTRESPONSE[s/\.\.\.//] - -You can use a <> -sub-aggregation to return results for a specific reseller. - -[source,console] ----- -GET /products/_search?size=0 -{ - "query": { - "match": { - "name": "led tv" - } - }, - "aggs": { - "resellers": { - "nested": { - "path": "resellers" - }, - "aggs": { - "filter_reseller": { - "filter": { - "bool": { - "filter": [ - { - "term": { - "resellers.reseller": "companyB" - } - } - ] - } - }, - "aggs": { - "min_price": { - "min": { - "field": "resellers.price" - } - } - } - } - } - } - } -} ----- -// TEST[s/size=0/size=0&filter_path=aggregations/] -// TEST[continued] - -The search returns: - -[source,console-result] ----- -{ - ... - "aggregations": { - "resellers": { - "doc_count": 2, - "filter_reseller": { - "doc_count": 1, - "min_price": { - "value": 500.0 - } - } - } - } -} ----- -// TESTRESPONSE[s/\.\.\.//] diff --git a/docs/reference/aggregations/bucket/parent-aggregation.asciidoc b/docs/reference/aggregations/bucket/parent-aggregation.asciidoc deleted file mode 100644 index e53bb50fedc52..0000000000000 --- a/docs/reference/aggregations/bucket/parent-aggregation.asciidoc +++ /dev/null @@ -1,213 +0,0 @@ -[[search-aggregations-bucket-parent-aggregation]] -=== Parent aggregation -++++ -Parent -++++ - -A special single bucket aggregation that selects parent documents that have the specified type, as defined in a <>. - -This aggregation has a single option: - -* `type` - The child type that should be selected. - -For example, let's say we have an index of questions and answers. The answer type has the following `join` field in the mapping: - -[source,console,id=parent-aggregation-example] --------------------------------------------------- -PUT parent_example -{ - "mappings": { - "properties": { - "join": { - "type": "join", - "relations": { - "question": "answer" - } - } - } - } -} --------------------------------------------------- - -The `question` document contain a tag field and the `answer` documents contain an owner field. With the `parent` -aggregation the owner buckets can be mapped to the tag buckets in a single request even though the two fields exist in -two different kinds of documents. - -An example of a question document: - -[source,console] --------------------------------------------------- -PUT parent_example/_doc/1 -{ - "join": { - "name": "question" - }, - "body": "

I have Windows 2003 server and i bought a new Windows 2008 server...", - "title": "Whats the best way to file transfer my site from server to a newer one?", - "tags": [ - "windows-server-2003", - "windows-server-2008", - "file-transfer" - ] -} --------------------------------------------------- -// TEST[continued] - -Examples of `answer` documents: - -[source,console] --------------------------------------------------- -PUT parent_example/_doc/2?routing=1 -{ - "join": { - "name": "answer", - "parent": "1" - }, - "owner": { - "location": "Norfolk, United Kingdom", - "display_name": "Sam", - "id": 48 - }, - "body": "

Unfortunately you're pretty much limited to FTP...", - "creation_date": "2009-05-04T13:45:37.030" -} - -PUT parent_example/_doc/3?routing=1&refresh -{ - "join": { - "name": "answer", - "parent": "1" - }, - "owner": { - "location": "Norfolk, United Kingdom", - "display_name": "Troll", - "id": 49 - }, - "body": "

Use Linux...", - "creation_date": "2009-05-05T13:45:37.030" -} --------------------------------------------------- -// TEST[continued] - -The following request can be built that connects the two together: - -[source,console] --------------------------------------------------- -POST parent_example/_search?size=0 -{ - "aggs": { - "top-names": { - "terms": { - "field": "owner.display_name.keyword", - "size": 10 - }, - "aggs": { - "to-questions": { - "parent": { - "type" : "answer" <1> - }, - "aggs": { - "top-tags": { - "terms": { - "field": "tags.keyword", - "size": 10 - } - } - } - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -<1> The `type` points to type / mapping with the name `answer`. - -The above example returns the top answer owners and per owner the top question tags. - -Possible response: - -[source,console-result] --------------------------------------------------- -{ - "took": 9, - "timed_out": false, - "_shards": { - "total": 1, - "successful": 1, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total" : { - "value": 3, - "relation": "eq" - }, - "max_score": null, - "hits": [] - }, - "aggregations": { - "top-names": { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "Sam", - "doc_count": 1, <1> - "to-questions": { - "doc_count": 1, <2> - "top-tags": { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "file-transfer", - "doc_count": 1 - }, - { - "key": "windows-server-2003", - "doc_count": 1 - }, - { - "key": "windows-server-2008", - "doc_count": 1 - } - ] - } - } - }, - { - "key": "Troll", - "doc_count": 1, - "to-questions": { - "doc_count": 1, - "top-tags": { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "file-transfer", - "doc_count": 1 - }, - { - "key": "windows-server-2003", - "doc_count": 1 - }, - { - "key": "windows-server-2008", - "doc_count": 1 - } - ] - } - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 9/"took": $body.took/] - -<1> The number of answer documents with the tag `Sam`, `Troll`, etc. -<2> The number of question documents that are related to answer documents with the tag `Sam`, `Troll`, etc. diff --git a/docs/reference/aggregations/bucket/random-sampler-aggregation.asciidoc b/docs/reference/aggregations/bucket/random-sampler-aggregation.asciidoc deleted file mode 100644 index 9fc533104024d..0000000000000 --- a/docs/reference/aggregations/bucket/random-sampler-aggregation.asciidoc +++ /dev/null @@ -1,121 +0,0 @@ -[[search-aggregations-random-sampler-aggregation]] -=== Random sampler aggregation -++++ -Random sampler -++++ - -experimental::[] - -The `random_sampler` aggregation is a single bucket aggregation that randomly -includes documents in the aggregated results. Sampling provides significant -speed improvement at the cost of accuracy. - -The sampling is accomplished by providing a random subset of the entire set of -documents in a shard. If a filter query is provided in the search request, that -filter is applied over the sampled subset. Consequently, if a filter is -restrictive, very few documents might match; therefore, the statistics might not -be as accurate. - -NOTE: This aggregation is not to be confused with the -<>. The -sampler aggregation is not over all documents; rather, it samples the first `n` -documents matched by the query. - -[source,console] ----- -GET kibana_sample_data_ecommerce/_search?size=0&track_total_hits=false -{ - "aggregations": { - "sampling": { - "random_sampler": { - "probability": 0.1 - }, - "aggs": { - "price_percentiles": { - "percentiles": { - "field": "taxful_total_price" - } - } - } - } - } -} ----- -// TEST[setup:kibana_sample_data_ecommerce] - -[[random-sampler-top-level-params]] -==== Top-level parameters for random_sampler - -`probability`:: -(Required, float) The probability that a document will be included in the -aggregated data. Must be greater than 0, less than `0.5`, or exactly `1`. The -lower the probability, the fewer documents are matched. - -`seed`:: -(Optional, integer) The seed to generate the random sampling of documents. When -a seed is provided, the random subset of documents is the same between calls. - -[[random-sampler-inner-workings]] -==== How does the sampling work? - -The aggregation is a random sample of all the documents in the index. In other -words, the sampling is over the background set of documents. If a query is -provided, a document is returned if it is matched by the query and if the -document is in the random sampling. The sampling is not done over the matched -documents. - -Consider the set of documents `[1, 2, 3, 4, 5]`. Your query matches `[1, 3, 5]` -and the randomly sampled set is `[2, 4, 5]`. In this case, the document returned -would be `[5]`. - -This type of sampling provides almost linear improvement in query latency in relation to the amount -by which sampling reduces the document set size: - -image::images/aggregations/random-sampler-agg-graph.png[Graph of the median speedup by sampling factor,align="center"] - -This graph is typical of the speed up for the majority of aggregations for a test data set of 63 million documents. The exact constants will depend on the data set size and the number of shards, but the form of the relationship between speed up and probability holds widely. For certain aggregations, the speed up may not -be as dramatic. These aggregations have some constant overhead unrelated to the number of documents seen. Even for -those aggregations, the speed improvements can be significant. - -The sample set is generated by skipping documents using a geometric distribution -(`(1-p)^(k-1)*p`) with success probability being the provided `probability` (`p` in the distribution equation). -The values returned from the distribution indicate how many documents to skip in -the background. This is equivalent to selecting documents uniformly at random. It follows that the expected number of failures before a success is -`(1-p)/p`. For example, with the `"probability": 0.01`, the expected number of failures (or -average number of documents skipped) would be `99` with a variance of `9900`. -Consequently, if you had only 80 documents in your index or matched by your -filter, you would most likely receive no results. - -image::images/aggregations/relative-error-vs-doc-count.png[Graph of the relative error by sampling probability and doc count,align="center"] - -In the above image `p` is the probability provided to the aggregation, and `n` is the number of documents matched by whatever -query is provided. You can see the impact of outliers on `sum` and `mean`, but when many documents are still matched at -higher sampling rates, the relative error is still low. - -NOTE: This represents the result of aggregations against a typical positively skewed APM data set which also has outliers in the upper tail. The linear dependence of the relative error on the sample size is found to hold widely, but the slope depends on the variation in the quantity being aggregated. As such, the variance in your own data may - cause relative error rates to increase or decrease at a different rate. -[[random-sampler-consistency]] -==== Random sampler consistency - -For a given `probability` and `seed`, the random sampler aggregation is consistent when sampling unchanged data from the same shard. -However, this is background random sampling if a particular document is included in the sampled set or not is dependent on current number of segments. - -Meaning, replica vs. primary shards could return different values as different particular documents are sampled. - -If the shard changes in via doc addition, update, deletion, or segment merging, the particular documents sampled could change, and thus the resulting statistics could change. - -The resulting statistics used from the random sampler aggregation are approximate and should be treated as such. - -[[random-sampler-special-cases]] -==== Random sampling special cases - -All counts returned by the random sampler aggregation are scaled to ease visualizations and calculations. For example, -when randomly sampling a <> every -`doc_count` value for every bucket is scaled by the inverse of the random_sampler `probability` value. So, if `doc_count` -for a bucket is `10,000` with `probability: 0.1`, the actual number of documents aggregated is `1,000`. - -An exception to this is <>. Unique item -counts are not suitable for automatic scaling. When interpreting the cardinality count, compare it -to the number of sampled docs provided in the top level `doc_count` within the random_sampler aggregation. It gives -you an idea of unique values as a percentage of total values. It may not reflect, however, the exact number of unique values -for the given field. diff --git a/docs/reference/aggregations/bucket/range-aggregation.asciidoc b/docs/reference/aggregations/bucket/range-aggregation.asciidoc deleted file mode 100644 index 07095882c2f04..0000000000000 --- a/docs/reference/aggregations/bucket/range-aggregation.asciidoc +++ /dev/null @@ -1,443 +0,0 @@ -[[search-aggregations-bucket-range-aggregation]] -=== Range aggregation -++++ -Range -++++ - -A multi-bucket value source based aggregation that enables the user to define a set of ranges - each representing a bucket. During the aggregation process, the values extracted from each document will be checked against each bucket range and "bucket" the relevant/matching document. -Note that this aggregation includes the `from` value and excludes the `to` value for each range. - -Example: - -[source,console,id=range-aggregation-example] ----- -GET sales/_search -{ - "aggs": { - "price_ranges": { - "range": { - "field": "price", - "ranges": [ - { "to": 100.0 }, - { "from": 100.0, "to": 200.0 }, - { "from": 200.0 } - ] - } - } - } -} ----- -// TEST[setup:sales] -// TEST[s/_search/_search\?filter_path=aggregations/] - -Response: - -[source,console-result] ----- -{ - ... - "aggregations": { - "price_ranges": { - "buckets": [ - { - "key": "*-100.0", - "to": 100.0, - "doc_count": 2 - }, - { - "key": "100.0-200.0", - "from": 100.0, - "to": 200.0, - "doc_count": 2 - }, - { - "key": "200.0-*", - "from": 200.0, - "doc_count": 3 - } - ] - } - } -} ----- -// TESTRESPONSE[s/\.\.\.//] - -==== Keyed Response - -Setting the `keyed` flag to `true` will associate a unique string key with each bucket and return the ranges as a hash rather than an array: - -[source,console,id=range-aggregation-keyed-example] ----- -GET sales/_search -{ - "aggs": { - "price_ranges": { - "range": { - "field": "price", - "keyed": true, - "ranges": [ - { "to": 100 }, - { "from": 100, "to": 200 }, - { "from": 200 } - ] - } - } - } -} ----- -// TEST[setup:sales] -// TEST[s/_search/_search\?filter_path=aggregations/] - -Response: - -[source,console-result] ----- -{ - ... - "aggregations": { - "price_ranges": { - "buckets": { - "*-100.0": { - "to": 100.0, - "doc_count": 2 - }, - "100.0-200.0": { - "from": 100.0, - "to": 200.0, - "doc_count": 2 - }, - "200.0-*": { - "from": 200.0, - "doc_count": 3 - } - } - } - } -} ----- -// TESTRESPONSE[s/\.\.\.//] - -It is also possible to customize the key for each range: - -[source,console,id=range-aggregation-custom-keys-example] ----- -GET sales/_search -{ - "aggs": { - "price_ranges": { - "range": { - "field": "price", - "keyed": true, - "ranges": [ - { "key": "cheap", "to": 100 }, - { "key": "average", "from": 100, "to": 200 }, - { "key": "expensive", "from": 200 } - ] - } - } - } -} ----- -// TEST[setup:sales] -// TEST[s/_search/_search\?filter_path=aggregations/] - -Response: - -[source,console-result] ----- -{ - ... - "aggregations": { - "price_ranges": { - "buckets": { - "cheap": { - "to": 100.0, - "doc_count": 2 - }, - "average": { - "from": 100.0, - "to": 200.0, - "doc_count": 2 - }, - "expensive": { - "from": 200.0, - "doc_count": 3 - } - } - } - } -} ----- -// TESTRESPONSE[s/\.\.\.//] - -==== Script - -If the data in your documents doesn't exactly match what you'd like to aggregate, -use a <>. For example, if you need to -apply a particular currency conversion rate: - -[source,console,id=range-aggregation-runtime-field-example] ----- -GET sales/_search -{ - "runtime_mappings": { - "price.euros": { - "type": "double", - "script": { - "source": """ - emit(doc['price'].value * params.conversion_rate) - """, - "params": { - "conversion_rate": 0.835526591 - } - } - } - }, - "aggs": { - "price_ranges": { - "range": { - "field": "price.euros", - "ranges": [ - { "to": 100 }, - { "from": 100, "to": 200 }, - { "from": 200 } - ] - } - } - } -} ----- -// TEST[setup:sales] -// TEST[s/_search/_search\?filter_path=aggregations/] - -////////////////////////// - -[source,console-result] ----- -{ - "aggregations": { - "price_ranges": { - "buckets": [ - { - "key": "*-100.0", - "to": 100.0, - "doc_count": 2 - }, - { - "key": "100.0-200.0", - "from": 100.0, - "to": 200.0, - "doc_count": 5 - }, - { - "key": "200.0-*", - "from": 200.0, - "doc_count": 0 - } - ] - } - } -} ----- - -////////////////////////// - -==== Sub Aggregations - -The following example, not only "bucket" the documents to the different buckets but also computes statistics over the prices in each price range - -[source,console,id=range-aggregation-sub-aggregation-example] ----- -GET sales/_search -{ - "aggs": { - "price_ranges": { - "range": { - "field": "price", - "ranges": [ - { "to": 100 }, - { "from": 100, "to": 200 }, - { "from": 200 } - ] - }, - "aggs": { - "price_stats": { - "stats": { "field": "price" } - } - } - } - } -} ----- -// TEST[setup:sales] -// TEST[s/_search/_search\?filter_path=aggregations/] - -Response: - -[source,console-result] ----- -{ - ... - "aggregations": { - "price_ranges": { - "buckets": [ - { - "key": "*-100.0", - "to": 100.0, - "doc_count": 2, - "price_stats": { - "count": 2, - "min": 10.0, - "max": 50.0, - "avg": 30.0, - "sum": 60.0 - } - }, - { - "key": "100.0-200.0", - "from": 100.0, - "to": 200.0, - "doc_count": 2, - "price_stats": { - "count": 2, - "min": 150.0, - "max": 175.0, - "avg": 162.5, - "sum": 325.0 - } - }, - { - "key": "200.0-*", - "from": 200.0, - "doc_count": 3, - "price_stats": { - "count": 3, - "min": 200.0, - "max": 200.0, - "avg": 200.0, - "sum": 600.0 - } - } - ] - } - } -} ----- -// TESTRESPONSE[s/\.\.\.//] - -[[search-aggregations-bucket-range-aggregation-histogram-fields]] -==== Histogram fields - -Running a range aggregation over histogram fields computes the total number of counts for each configured range. - -This is done without interpolating between the histogram field values. Consequently, it is possible to have a range -that is "in-between" two histogram values. The resulting range bucket would have a zero doc count. - -Here is an example, executing a range aggregation against the following index that stores pre-aggregated histograms -with latency metrics (in milliseconds) for different networks: - -[source,console] ----- -PUT metrics_index -{ - "mappings": { - "properties": { - "network": { - "properties": { - "name": { - "type": "keyword" - } - } - }, - "latency_histo": { - "type": "histogram" - } - } - } -} - -PUT metrics_index/_doc/1?refresh -{ - "network.name" : "net-1", - "latency_histo" : { - "values" : [1, 3, 8, 12, 15], - "counts" : [3, 7, 23, 12, 6] - } -} - -PUT metrics_index/_doc/2?refresh -{ - "network.name" : "net-2", - "latency_histo" : { - "values" : [1, 6, 8, 12, 14], - "counts" : [8, 17, 8, 7, 6] - } -} - -GET metrics_index/_search?size=0&filter_path=aggregations -{ - "aggs": { - "latency_ranges": { - "range": { - "field": "latency_histo", - "ranges": [ - {"to": 2}, - {"from": 2, "to": 3}, - {"from": 3, "to": 10}, - {"from": 10} - ] - } - } - } -} ----- - -The `range` aggregation will sum the counts of each range computed based on the `values` and -return the following output: - -[source,console-result] ----- -{ - "aggregations": { - "latency_ranges": { - "buckets": [ - { - "key": "*-2.0", - "to": 2.0, - "doc_count": 11 - }, - { - "key": "2.0-3.0", - "from": 2.0, - "to": 3.0, - "doc_count": 0 - }, - { - "key": "3.0-10.0", - "from": 3.0, - "to": 10.0, - "doc_count": 55 - }, - { - "key": "10.0-*", - "from": 10.0, - "doc_count": 31 - } - ] - } - } -} ----- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -[IMPORTANT] -======== -Range aggregation is a bucket aggregation, which partitions documents into buckets rather than calculating metrics over fields like -metrics aggregations do. Each bucket represents a collection of documents which sub-aggregations can run on. -On the other hand, a histogram field is a pre-aggregated field representing multiple values inside a single field: -buckets of numerical data and a count of items/documents for each bucket. This mismatch between the range aggregations expected input -(expecting raw documents) and the histogram field (that provides summary information) limits the outcome of the aggregation -to only the doc counts for each bucket. - -**Consequently, when executing a range aggregation over a histogram field, no sub-aggregations are allowed.** -======== diff --git a/docs/reference/aggregations/bucket/range-field-note.asciidoc b/docs/reference/aggregations/bucket/range-field-note.asciidoc deleted file mode 100644 index bb18bd2afd447..0000000000000 --- a/docs/reference/aggregations/bucket/range-field-note.asciidoc +++ /dev/null @@ -1,190 +0,0 @@ -[[search-aggregations-bucket-range-field-note]] -=== Subtleties of bucketing range fields - -==== Documents are counted for each bucket they land in - -Since a range represents multiple values, running a bucket aggregation over a -range field can result in the same document landing in multiple buckets. This -can lead to surprising behavior, such as the sum of bucket counts being higher -than the number of matched documents. For example, consider the following -index: -[source, console] --------------------------------------------------- -PUT range_index -{ - "settings": { - "number_of_shards": 2 - }, - "mappings": { - "properties": { - "expected_attendees": { - "type": "integer_range" - }, - "time_frame": { - "type": "date_range", - "format": "yyyy-MM-dd||epoch_millis" - } - } - } -} - -PUT range_index/_doc/1?refresh -{ - "expected_attendees" : { - "gte" : 10, - "lte" : 20 - }, - "time_frame" : { - "gte" : "2019-10-28", - "lte" : "2019-11-04" - } -} --------------------------------------------------- -// TESTSETUP - -The range is wider than the interval in the following aggregation, and thus the -document will land in multiple buckets. - -[source, console,id=range-field-aggregation-example] --------------------------------------------------- -POST /range_index/_search?size=0 -{ - "aggs": { - "range_histo": { - "histogram": { - "field": "expected_attendees", - "interval": 5 - } - } - } -} --------------------------------------------------- - -Since the interval is `5` (and the offset is `0` by default), we expect buckets `10`, -`15`, and `20`. Our range document will fall in all three of these buckets. - -[source, console-result] --------------------------------------------------- -{ - ... - "aggregations" : { - "range_histo" : { - "buckets" : [ - { - "key" : 10.0, - "doc_count" : 1 - }, - { - "key" : 15.0, - "doc_count" : 1 - }, - { - "key" : 20.0, - "doc_count" : 1 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -A document cannot exist partially in a bucket; For example, the above document -cannot count as one-third in each of the above three buckets. In this example, -since the document's range landed in multiple buckets, the full value of that -document would also be counted in any sub-aggregations for each bucket as well. - -==== Query bounds are not aggregation filters - -Another unexpected behavior can arise when a query is used to filter on the -field being aggregated. In this case, a document could match the query but -still have one or both of the endpoints of the range outside the query. -Consider the following aggregation on the above document: - -[source, console,id=range-field-aggregation-query-bounds-example] --------------------------------------------------- -POST /range_index/_search?size=0 -{ - "query": { - "range": { - "time_frame": { - "gte": "2019-11-01", - "format": "yyyy-MM-dd" - } - } - }, - "aggs": { - "november_data": { - "date_histogram": { - "field": "time_frame", - "calendar_interval": "day", - "format": "yyyy-MM-dd" - } - } - } -} --------------------------------------------------- - -Even though the query only considers days in November, the aggregation -generates 8 buckets (4 in October, 4 in November) because the aggregation is -calculated over the ranges of all matching documents. - -[source, console-result] --------------------------------------------------- -{ - ... - "aggregations" : { - "november_data" : { - "buckets" : [ - { - "key_as_string" : "2019-10-28", - "key" : 1572220800000, - "doc_count" : 1 - }, - { - "key_as_string" : "2019-10-29", - "key" : 1572307200000, - "doc_count" : 1 - }, - { - "key_as_string" : "2019-10-30", - "key" : 1572393600000, - "doc_count" : 1 - }, - { - "key_as_string" : "2019-10-31", - "key" : 1572480000000, - "doc_count" : 1 - }, - { - "key_as_string" : "2019-11-01", - "key" : 1572566400000, - "doc_count" : 1 - }, - { - "key_as_string" : "2019-11-02", - "key" : 1572652800000, - "doc_count" : 1 - }, - { - "key_as_string" : "2019-11-03", - "key" : 1572739200000, - "doc_count" : 1 - }, - { - "key_as_string" : "2019-11-04", - "key" : 1572825600000, - "doc_count" : 1 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -Depending on the use case, a `CONTAINS` query could limit the documents to only -those that fall entirely in the queried range. In this example, the one -document would not be included and the aggregation would be empty. Filtering -the buckets after the aggregation is also an option, for use cases where the -document should be counted but the out of bounds data can be safely ignored. diff --git a/docs/reference/aggregations/bucket/rare-terms-aggregation.asciidoc b/docs/reference/aggregations/bucket/rare-terms-aggregation.asciidoc deleted file mode 100644 index d02b75db96af0..0000000000000 --- a/docs/reference/aggregations/bucket/rare-terms-aggregation.asciidoc +++ /dev/null @@ -1,355 +0,0 @@ -[[search-aggregations-bucket-rare-terms-aggregation]] -=== Rare terms aggregation -++++ -Rare terms -++++ - -A multi-bucket value source based aggregation which finds "rare" terms -- terms that are at the long-tail -of the distribution and are not frequent. Conceptually, this is like a `terms` aggregation that is -sorted by `_count` ascending. As noted in the <>, -actually ordering a `terms` agg by count ascending has unbounded error. Instead, you should use the `rare_terms` -aggregation - -////////////////////////// - -[source,js] --------------------------------------------------- -PUT /products -{ - "mappings": { - "properties": { - "genre": { - "type": "keyword" - }, - "product": { - "type": "keyword" - } - } - } -} - -POST /products/_bulk?refresh -{"index":{"_id":0}} -{"genre": "rock", "product": "Product A"} -{"index":{"_id":1}} -{"genre": "rock"} -{"index":{"_id":2}} -{"genre": "rock"} -{"index":{"_id":3}} -{"genre": "jazz", "product": "Product Z"} -{"index":{"_id":4}} -{"genre": "jazz"} -{"index":{"_id":5}} -{"genre": "electronic"} -{"index":{"_id":6}} -{"genre": "electronic"} -{"index":{"_id":7}} -{"genre": "electronic"} -{"index":{"_id":8}} -{"genre": "electronic"} -{"index":{"_id":9}} -{"genre": "electronic"} -{"index":{"_id":10}} -{"genre": "swing"} - -------------------------------------------------- -// NOTCONSOLE -// TESTSETUP - -////////////////////////// - -==== Syntax - -A `rare_terms` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "rare_terms": { - "field": "the_field", - "max_doc_count": 1 - } -} --------------------------------------------------- -// NOTCONSOLE - -.`rare_terms` Parameters -|=== -|Parameter Name |Description |Required |Default Value -|`field` |The field we wish to find rare terms in |Required | -|`max_doc_count` |The maximum number of documents a term should appear in. |Optional |`1` -|`precision` |The precision of the internal CuckooFilters. Smaller precision leads to -better approximation, but higher memory usage. Cannot be smaller than `0.00001` |Optional |`0.001` -|`include` |Terms that should be included in the aggregation|Optional | -|`exclude` |Terms that should be excluded from the aggregation|Optional | -|`missing` |The value that should be used if a document does not have the field being aggregated|Optional | -|=== - - -Example: - -[source,console,id=rare-terms-aggregation-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "genres": { - "rare_terms": { - "field": "genre" - } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search\?filter_path=aggregations/] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "genres": { - "buckets": [ - { - "key": "swing", - "doc_count": 1 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] - -In this example, the only bucket that we see is the "swing" bucket, because it is the only term that appears in -one document. If we increase the `max_doc_count` to `2`, we'll see some more buckets: - -[source,console,id=rare-terms-aggregation-max-doc-count-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "genres": { - "rare_terms": { - "field": "genre", - "max_doc_count": 2 - } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search\?filter_path=aggregations/] - -This now shows the "jazz" term which has a `doc_count` of 2": - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "genres": { - "buckets": [ - { - "key": "swing", - "doc_count": 1 - }, - { - "key": "jazz", - "doc_count": 2 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] - -[[search-aggregations-bucket-rare-terms-aggregation-max-doc-count]] -==== Maximum document count - -The `max_doc_count` parameter is used to control the upper bound of document counts that a term can have. There -is not a size limitation on the `rare_terms` agg like `terms` agg has. This means that terms -which match the `max_doc_count` criteria will be returned. The aggregation functions in this manner to avoid -the order-by-ascending issues that afflict the `terms` aggregation. - -This does, however, mean that a large number of results can be returned if chosen incorrectly. -To limit the danger of this setting, the maximum `max_doc_count` is 100. - -[[search-aggregations-bucket-rare-terms-aggregation-max-buckets]] -==== Max Bucket Limit - -The Rare Terms aggregation is more liable to trip the `search.max_buckets` soft limit than other aggregations due -to how it works. The `max_bucket` soft-limit is evaluated on a per-shard basis while the aggregation is collecting -results. It is possible for a term to be "rare" on a shard but become "not rare" once all the shard results are -merged together. This means that individual shards tend to collect more buckets than are truly rare, because -they only have their own local view. This list is ultimately pruned to the correct, smaller list of rare -terms on the coordinating node... but a shard may have already tripped the `max_buckets` soft limit and aborted -the request. - -When aggregating on fields that have potentially many "rare" terms, you may need to increase the `max_buckets` soft -limit. Alternatively, you might need to find a way to filter the results to return fewer rare values (smaller time -span, filter by category, etc), or re-evaluate your definition of "rare" (e.g. if something -appears 100,000 times, is it truly "rare"?) - -[[search-aggregations-bucket-rare-terms-aggregation-approximate-counts]] -==== Document counts are approximate - -The naive way to determine the "rare" terms in a dataset is to place all the values in a map, incrementing counts -as each document is visited, then return the bottom `n` rows. This does not scale beyond even modestly sized data -sets. A sharded approach where only the "top n" values are retained from each shard (ala the `terms` aggregation) -fails because the long-tail nature of the problem means it is impossible to find the "top n" bottom values without -simply collecting all the values from all shards. - -Instead, the Rare Terms aggregation uses a different approximate algorithm: - -1. Values are placed in a map the first time they are seen. -2. Each addition occurrence of the term increments a counter in the map -3. If the counter > the `max_doc_count` threshold, the term is removed from the map and placed in a -https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf[CuckooFilter] -4. The CuckooFilter is consulted on each term. If the value is inside the filter, it is known to be above the -threshold already and skipped. - -After execution, the map of values is the map of "rare" terms under the `max_doc_count` threshold. This map and CuckooFilter -are then merged with all other shards. If there are terms that are greater than the threshold (or appear in -a different shard's CuckooFilter) the term is removed from the merged list. The final map of values is returned -to the user as the "rare" terms. - -CuckooFilters have the possibility of returning false positives (they can say a value exists in their collection when -it actually does not). Since the CuckooFilter is being used to see if a term is over threshold, this means a false positive -from the CuckooFilter will mistakenly say a value is common when it is not (and thus exclude it from it final list of buckets). -Practically, this means the aggregations exhibits false-negative behavior since the filter is being used "in reverse" -of how people generally think of approximate set membership sketches. - -CuckooFilters are described in more detail in the paper: - -https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf[Fan, Bin, et al. "Cuckoo filter: Practically better than bloom."] -Proceedings of the 10th ACM International on Conference on emerging Networking Experiments and Technologies. ACM, 2014. - -==== Precision - -Although the internal CuckooFilter is approximate in nature, the false-negative rate can be controlled with a -`precision` parameter. This allows the user to trade more runtime memory for more accurate results. - -The default precision is `0.001`, and the smallest (e.g. most accurate and largest memory overhead) is `0.00001`. -Below are some charts which demonstrate how the accuracy of the aggregation is affected by precision and number -of distinct terms. - -The X-axis shows the number of distinct values the aggregation has seen, and the Y-axis shows the percent error. -Each line series represents one "rarity" condition (ranging from one rare item to 100,000 rare items). For example, -the orange "10" line means ten of the values were "rare" (`doc_count == 1`), out of 1-20m distinct values (where the -rest of the values had `doc_count > 1`) - -This first chart shows precision `0.01`: - -image:images/rare_terms/accuracy_01.png[] - -And precision `0.001` (the default): - -image:images/rare_terms/accuracy_001.png[] - -And finally `precision 0.0001`: - -image:images/rare_terms/accuracy_0001.png[] - -The default precision of `0.001` maintains an accuracy of < 2.5% for the tested conditions, and accuracy slowly -degrades in a controlled, linear fashion as the number of distinct values increases. - -The default precision of `0.001` has a memory profile of `1.748⁻⁶ * n` bytes, where `n` is the number -of distinct values the aggregation has seen (it can also be roughly eyeballed, e.g. 20 million unique values is about -30mb of memory). The memory usage is linear to the number of distinct values regardless of which precision is chosen, -the precision only affects the slope of the memory profile as seen in this chart: - -image:images/rare_terms/memory.png[] - -For comparison, an equivalent terms aggregation at 20 million buckets would be roughly -`20m * 69b == ~1.38gb` (with 69 bytes being a very optimistic estimate of an empty bucket cost, far lower than what -the circuit breaker accounts for). So although the `rare_terms` agg is relatively heavy, it is still orders of -magnitude smaller than the equivalent terms aggregation - -==== Filtering Values - -It is possible to filter the values for which buckets will be created. This can be done using the `include` and -`exclude` parameters which are based on regular expression strings or arrays of exact values. Additionally, -`include` clauses can filter using `partition` expressions. - -===== Filtering Values with regular expressions - -[source,console,id=rare-terms-aggregation-regex-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "genres": { - "rare_terms": { - "field": "genre", - "include": "swi*", - "exclude": "electro*" - } - } - } -} --------------------------------------------------- - -In the above example, buckets will be created for all the tags that starts with `swi`, except those starting -with `electro` (so the tag `swing` will be aggregated but not `electro_swing`). The `include` regular expression will determine what -values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When -both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`. - -The syntax is the same as <>. - -===== Filtering Values with exact values - -For matching based on exact values the `include` and `exclude` parameters can simply take an array of -strings that represent the terms as they are found in the index: - -[source,console,id=rare-terms-aggregation-exact-value-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "genres": { - "rare_terms": { - "field": "genre", - "include": [ "swing", "rock" ], - "exclude": [ "jazz" ] - } - } - } -} --------------------------------------------------- - - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they -had a value. - -[source,console,id=rare-terms-aggregation-missing-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "genres": { - "rare_terms": { - "field": "genre", - "missing": "N/A" <1> - } - } - } -} --------------------------------------------------- - -<1> Documents without a value in the `tags` field will fall into the same bucket as documents that have the value `N/A`. - -==== Nested, RareTerms, and scoring sub-aggregations - -The RareTerms aggregation has to operate in `breadth_first` mode, since it needs to prune terms as doc count thresholds -are breached. This requirement means the RareTerms aggregation is incompatible with certain combinations of aggregations -that require `depth_first`. In particular, scoring sub-aggregations that are inside a `nested` force the entire aggregation tree to run -in `depth_first` mode. This will throw an exception since RareTerms is unable to process `depth_first`. - -As a concrete example, if `rare_terms` aggregation is the child of a `nested` aggregation, and one of the child aggregations of `rare_terms` -needs document scores (like a `top_hits` aggregation), this will throw an exception. diff --git a/docs/reference/aggregations/bucket/reverse-nested-aggregation.asciidoc b/docs/reference/aggregations/bucket/reverse-nested-aggregation.asciidoc deleted file mode 100644 index b59d7c656bdfe..0000000000000 --- a/docs/reference/aggregations/bucket/reverse-nested-aggregation.asciidoc +++ /dev/null @@ -1,139 +0,0 @@ -[[search-aggregations-bucket-reverse-nested-aggregation]] -=== Reverse nested aggregation -++++ -Reverse nested -++++ - -A special single bucket aggregation that enables aggregating on parent docs from nested documents. Effectively this -aggregation can break out of the nested block structure and link to other nested structures or the root document, -which allows nesting other aggregations that aren't part of the nested object in a nested aggregation. - -The `reverse_nested` aggregation must be defined inside a `nested` aggregation. - -.Options: -* `path` - Which defines to what nested object field should be joined back. The default is empty, -which means that it joins back to the root / main document level. The path cannot contain a reference to -a nested object field that falls outside the `nested` aggregation's nested structure a `reverse_nested` is in. - -For example, lets say we have an index for a ticket system with issues and comments. The comments are inlined into -the issue documents as nested documents. The mapping could look like: - -[source,console,id=reversed-nested-aggregation-example] --------------------------------------------------- -PUT /issues -{ - "mappings": { - "properties": { - "tags": { "type": "keyword" }, - "comments": { <1> - "type": "nested", - "properties": { - "username": { "type": "keyword" }, - "comment": { "type": "text" } - } - } - } - } -} --------------------------------------------------- - -<1> The `comments` is an array that holds nested documents under the `issue` object. - -The following aggregations will return the top commenters' username that have commented and per top commenter the top -tags of the issues the user has commented on: - -////////////////////////// - -[source,console] --------------------------------------------------- -POST /issues/_doc/0?refresh -{"tags": ["tag_1"], "comments": [{"username": "username_1"}]} --------------------------------------------------- -// TEST[continued] - -////////////////////////// - -[source,console] --------------------------------------------------- -GET /issues/_search -{ - "query": { - "match_all": {} - }, - "aggs": { - "comments": { - "nested": { - "path": "comments" - }, - "aggs": { - "top_usernames": { - "terms": { - "field": "comments.username" - }, - "aggs": { - "comment_to_issue": { - "reverse_nested": {}, <1> - "aggs": { - "top_tags_per_comment": { - "terms": { - "field": "tags" - } - } - } - } - } - } - } - } - } -} --------------------------------------------------- -// TEST[continued] -// TEST[s/_search/_search\?filter_path=aggregations/] - -As you can see above, the `reverse_nested` aggregation is put in to a `nested` aggregation as this is the only place -in the dsl where the `reverse_nested` aggregation can be used. Its sole purpose is to join back to a parent doc higher -up in the nested structure. - -<1> A `reverse_nested` aggregation that joins back to the root / main document level, because no `path` has been defined. -Via the `path` option the `reverse_nested` aggregation can join back to a different level, if multiple layered nested -object types have been defined in the mapping - -Possible response snippet: - -[source,console-result] --------------------------------------------------- -{ - "aggregations": { - "comments": { - "doc_count": 1, - "top_usernames": { - "doc_count_error_upper_bound" : 0, - "sum_other_doc_count" : 0, - "buckets": [ - { - "key": "username_1", - "doc_count": 1, - "comment_to_issue": { - "doc_count": 1, - "top_tags_per_comment": { - "doc_count_error_upper_bound" : 0, - "sum_other_doc_count" : 0, - "buckets": [ - { - "key": "tag_1", - "doc_count": 1 - } - ... - ] - } - } - } - ... - ] - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] diff --git a/docs/reference/aggregations/bucket/sampler-aggregation.asciidoc b/docs/reference/aggregations/bucket/sampler-aggregation.asciidoc deleted file mode 100644 index 24dcfcb7a42db..0000000000000 --- a/docs/reference/aggregations/bucket/sampler-aggregation.asciidoc +++ /dev/null @@ -1,163 +0,0 @@ -[[search-aggregations-bucket-sampler-aggregation]] -=== Sampler aggregation -++++ -Sampler -++++ - -A filtering aggregation used to limit any sub aggregations' processing to a sample of the top-scoring documents. - -.Example use cases: -* Tightening the focus of analytics to high-relevance matches rather than the potentially very long tail of low-quality matches -* Reducing the running cost of aggregations that can produce useful results using only samples e.g. `significant_terms` - - -Example: - -A query on StackOverflow data for the popular term `javascript` OR the rarer term -`kibana` will match many documents - most of them missing the word Kibana. To focus -the `significant_terms` aggregation on top-scoring documents that are more likely to match -the most interesting parts of our query we use a sample. - -[source,console,id=sampler-aggregation-example] --------------------------------------------------- -POST /stackoverflow/_search?size=0 -{ - "query": { - "query_string": { - "query": "tags:kibana OR tags:javascript" - } - }, - "aggs": { - "sample": { - "sampler": { - "shard_size": 200 - }, - "aggs": { - "keywords": { - "significant_terms": { - "field": "tags", - "exclude": [ "kibana", "javascript" ] - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:stackoverflow] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "sample": { - "doc_count": 200, <1> - "keywords": { - "doc_count": 200, - "bg_count": 650, - "buckets": [ - { - "key": "elasticsearch", - "doc_count": 150, - "score": 1.078125, - "bg_count": 200 - }, - { - "key": "logstash", - "doc_count": 50, - "score": 0.5625, - "bg_count": 50 - } - ] - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -<1> 200 documents were sampled in total. The cost of performing the nested significant_terms aggregation was -therefore limited rather than unbounded. - - -Without the `sampler` aggregation the request query considers the full "long tail" of low-quality matches and therefore identifies -less significant terms such as `jquery` and `angular` rather than focusing on the more insightful Kibana-related terms. - - -[source,console,id=sampler-aggregation-no-sampler-example] --------------------------------------------------- -POST /stackoverflow/_search?size=0 -{ - "query": { - "query_string": { - "query": "tags:kibana OR tags:javascript" - } - }, - "aggs": { - "low_quality_keywords": { - "significant_terms": { - "field": "tags", - "size": 3, - "exclude": [ "kibana", "javascript" ] - } - } - } -} --------------------------------------------------- -// TEST[setup:stackoverflow] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "low_quality_keywords": { - "doc_count": 600, - "bg_count": 650, - "buckets": [ - { - "key": "angular", - "doc_count": 200, - "score": 0.02777, - "bg_count": 200 - }, - { - "key": "jquery", - "doc_count": 200, - "score": 0.02777, - "bg_count": 200 - }, - { - "key": "logstash", - "doc_count": 50, - "score": 0.0069, - "bg_count": 50 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -// TESTRESPONSE[s/0.02777/$body.aggregations.low_quality_keywords.buckets.0.score/] -// TESTRESPONSE[s/0.0069/$body.aggregations.low_quality_keywords.buckets.2.score/] - - - -==== shard_size - -The `shard_size` parameter limits how many top-scoring documents are collected in the sample processed on each shard. -The default value is 100. - -==== Limitations - -[[sampler-breadth-first-nested-agg]] -===== Cannot be nested under `breadth_first` aggregations -Being a quality-based filter the sampler aggregation needs access to the relevance score produced for each document. -It therefore cannot be nested under a `terms` aggregation which has the `collect_mode` switched from the default `depth_first` mode to `breadth_first` as this discards scores. -In this situation an error will be thrown. \ No newline at end of file diff --git a/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc b/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc deleted file mode 100644 index 003bdd047113e..0000000000000 --- a/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc +++ /dev/null @@ -1,676 +0,0 @@ -[[search-aggregations-bucket-significantterms-aggregation]] -=== Significant terms aggregation -++++ -Significant terms -++++ - -An aggregation that returns interesting or unusual occurrences of terms in a set. - -.Example use cases: -* Suggesting "H5N1" when users search for "bird flu" in text -* Identifying the merchant that is the "common point of compromise" from the transaction history of credit card owners reporting loss -* Suggesting keywords relating to stock symbol $ATI for an automated news classifier -* Spotting the fraudulent doctor who is diagnosing more than their fair share of whiplash injuries -* Spotting the tire manufacturer who has a disproportionate number of blow-outs - -In all these cases the terms being selected are not simply the most popular terms in a set. -They are the terms that have undergone a significant change in popularity measured between a _foreground_ and _background_ set. -If the term "H5N1" only exists in 5 documents in a 10 million document index and yet is found in 4 of the 100 documents that make up a user's search results -that is significant and probably very relevant to their search. 5/10,000,000 vs 4/100 is a big swing in frequency. - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /reports -{ - "mappings": { - "properties": { - "force": { - "type": "keyword" - }, - "crime_type": { - "type": "keyword" - } - } - } -} - -POST /reports/_bulk?refresh -{"index":{"_id":0}} -{"force": "British Transport Police", "crime_type": "Bicycle theft"} -{"index":{"_id":1}} -{"force": "British Transport Police", "crime_type": "Bicycle theft"} -{"index":{"_id":2}} -{"force": "British Transport Police", "crime_type": "Bicycle theft"} -{"index":{"_id":3}} -{"force": "British Transport Police", "crime_type": "Robbery"} -{"index":{"_id":4}} -{"force": "Metropolitan Police Service", "crime_type": "Robbery"} -{"index":{"_id":5}} -{"force": "Metropolitan Police Service", "crime_type": "Bicycle theft"} -{"index":{"_id":6}} -{"force": "Metropolitan Police Service", "crime_type": "Robbery"} -{"index":{"_id":7}} -{"force": "Metropolitan Police Service", "crime_type": "Robbery"} - -------------------------------------------------- -// TESTSETUP - -////////////////////////// - -==== Single-set analysis - -In the simplest case, the _foreground_ set of interest is the search results matched by a query and the _background_ -set used for statistical comparisons is the index or indices from which the results were gathered. - -Example: - -[source,console,id=significantterms-aggregation-example] --------------------------------------------------- -GET /_search -{ - "query": { - "terms": { "force": [ "British Transport Police" ] } - }, - "aggregations": { - "significant_crime_types": { - "significant_terms": { "field": "crime_type" } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search\?filter_path=aggregations/] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "significant_crime_types": { - "doc_count": 47347, - "bg_count": 5064554, - "buckets": [ - { - "key": "Bicycle theft", - "doc_count": 3640, - "score": 0.371235374214817, - "bg_count": 66799 - } - ... - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] -// TESTRESPONSE[s/: (0\.)?[0-9]+/: $body.$_path/] - -When querying an index of all crimes from all police forces, what these results show is that the British Transport Police force -stand out as a force dealing with a disproportionately large number of bicycle thefts. Ordinarily, bicycle thefts represent only 1% of crimes (66799/5064554) -but for the British Transport Police, who handle crime on railways and stations, 7% of crimes (3640/47347) is -a bike theft. This is a significant seven-fold increase in frequency and so this anomaly was highlighted as the top crime type. - -The problem with using a query to spot anomalies is it only gives us one subset to use for comparisons. -To discover all the other police forces' anomalies we would have to repeat the query for each of the different forces. - -This can be a tedious way to look for unusual patterns in an index. - - - -==== Multi-set analysis -A simpler way to perform analysis across multiple categories is to use a parent-level aggregation to segment the data ready for analysis. - - -Example using a parent aggregation for segmentation: - -[source,console,id=significantterms-aggregation-multiset--example] --------------------------------------------------- -GET /_search -{ - "aggregations": { - "forces": { - "terms": { "field": "force" }, - "aggregations": { - "significant_crime_types": { - "significant_terms": { "field": "crime_type" } - } - } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search\?filter_path=aggregations/] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "forces": { - "doc_count_error_upper_bound": 1375, - "sum_other_doc_count": 7879845, - "buckets": [ - { - "key": "Metropolitan Police Service", - "doc_count": 894038, - "significant_crime_types": { - "doc_count": 894038, - "bg_count": 5064554, - "buckets": [ - { - "key": "Robbery", - "doc_count": 27617, - "score": 0.0599, - "bg_count": 53182 - } - ... - ] - } - }, - { - "key": "British Transport Police", - "doc_count": 47347, - "significant_crime_types": { - "doc_count": 47347, - "bg_count": 5064554, - "buckets": [ - { - "key": "Bicycle theft", - "doc_count": 3640, - "score": 0.371, - "bg_count": 66799 - } - ... - ] - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] -// TESTRESPONSE[s/: (0\.)?[0-9]+/: $body.$_path/] -// TESTRESPONSE[s/: "[^"]*"/: $body.$_path/] - -Now we have anomaly detection for each of the police forces using a single request. - -We can use other forms of top-level aggregations to segment our data, for example segmenting by geographic -area to identify unusual hot-spots of a particular crime type: - -[source,console,id=significantterms-aggregation-hotspot-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "hotspots": { - "geohash_grid": { - "field": "location", - "precision": 5 - }, - "aggs": { - "significant_crime_types": { - "significant_terms": { "field": "crime_type" } - } - } - } - } -} --------------------------------------------------- - -This example uses the `geohash_grid` aggregation to create result buckets that represent geographic areas, and inside each -bucket we can identify anomalous levels of a crime type in these tightly-focused areas e.g. - -* Airports exhibit unusual numbers of weapon confiscations -* Universities show uplifts of bicycle thefts - -At a higher geohash_grid zoom-level with larger coverage areas we would start to see where an entire police-force may be -tackling an unusual volume of a particular crime type. - - -Obviously a time-based top-level segmentation would help identify current trends for each point in time -where a simple `terms` aggregation would typically show the very popular "constants" that persist across all time slots. - - - -.How are the scores calculated? -********************************** -The numbers returned for scores are primarily intended for ranking different suggestions sensibly rather than something easily understood by end users. The scores are derived from the doc frequencies in _foreground_ and _background_ sets. In brief, a term is considered significant if there is a noticeable difference in the frequency in which a term appears in the subset and in the background. The way the terms are ranked can be configured, see "Parameters" section. - -********************************** - - -==== Use on free-text fields - -The significant_terms aggregation can be used effectively on tokenized free-text fields to suggest: - -* keywords for refining end-user searches -* keywords for use in percolator queries - -WARNING: Picking a free-text field as the subject of a significant terms analysis can be expensive! It will attempt -to load every unique word into RAM. It is recommended to only use this on smaller indices. - -.Use the _"like this but not this"_ pattern -********************************** -You can spot mis-categorized content by first searching a structured field e.g. `category:adultMovie` and use significant_terms on the -free-text "movie_description" field. Take the suggested words (I'll leave them to your imagination) and then search for all movies NOT marked as category:adultMovie but containing these keywords. -You now have a ranked list of badly-categorized movies that you should reclassify or at least remove from the "familyFriendly" category. - -The significance score from each term can also provide a useful `boost` setting to sort matches. -Using the `minimum_should_match` setting of the `terms` query with the keywords will help control the balance of precision/recall in the result set i.e -a high setting would have a small number of relevant results packed full of keywords and a setting of "1" would produce a more exhaustive results set with all documents containing _any_ keyword. - -********************************** - -[TIP] -============ -.Show significant_terms in context - -Free-text significant_terms are much more easily understood when viewed in context. Take the results of `significant_terms` suggestions from a -free-text field and use them in a `terms` query on the same field with a `highlight` clause to present users with example snippets of documents. When the terms -are presented unstemmed, highlighted, with the right case, in the right order and with some context, their significance/meaning is more readily apparent. -============ - -==== Custom background sets - -Ordinarily, the foreground set of documents is "diffed" against a background set of all the documents in your index. -However, sometimes it may prove useful to use a narrower background set as the basis for comparisons. -For example, a query on documents relating to "Madrid" in an index with content from all over the world might reveal that "Spanish" -was a significant term. This may be true but if you want some more focused terms you could use a `background_filter` -on the term 'spain' to establish a narrower set of documents as context. With this as a background "Spanish" would now -be seen as commonplace and therefore not as significant as words like "capital" that relate more strongly with Madrid. -Note that using a background filter will slow things down - each term's background frequency must now be derived on-the-fly from filtering posting lists rather than reading the index's pre-computed count for a term. - -==== Limitations - -===== Significant terms must be indexed values -Unlike the terms aggregation it is currently not possible to use script-generated terms for counting purposes. -Because of the way the significant_terms aggregation must consider both _foreground_ and _background_ frequencies -it would be prohibitively expensive to use a script on the entire index to obtain background frequencies for comparisons. -Also DocValues are not supported as sources of term data for similar reasons. - -===== No analysis of floating point fields -Floating point fields are currently not supported as the subject of significant_terms analysis. -While integer or long fields can be used to represent concepts like bank account numbers or category numbers which -can be interesting to track, floating point fields are usually used to represent quantities of something. -As such, individual floating point terms are not useful for this form of frequency analysis. - -===== Use as a parent aggregation -If there is the equivalent of a `match_all` query or no query criteria providing a subset of the index the significant_terms aggregation should not be used as the -top-most aggregation - in this scenario the _foreground_ set is exactly the same as the _background_ set and -so there is no difference in document frequencies to observe and from which to make sensible suggestions. - -Another consideration is that the significant_terms aggregation produces many candidate results at shard level -that are only later pruned on the reducing node once all statistics from all shards are merged. As a result, -it can be inefficient and costly in terms of RAM to embed large child aggregations under a significant_terms -aggregation that later discards many candidate terms. It is advisable in these cases to perform two searches - the first to provide a rationalized list of -significant_terms and then add this shortlist of terms to a second query to go back and fetch the required child aggregations. - -===== Approximate counts -The counts of how many documents contain a term provided in results are based on summing the samples returned from each shard and -as such may be: - -* low if certain shards did not provide figures for a given term in their top sample -* high when considering the background frequency as it may count occurrences found in deleted documents - -Like most design decisions, this is the basis of a trade-off in which we have chosen to provide fast performance at the cost of some (typically small) inaccuracies. -However, the `size` and `shard size` settings covered in the next section provide tools to help control the accuracy levels. - -[[significantterms-aggregation-parameters]] -==== Parameters - -===== JLH score -The JLH score can be used as a significance score by adding the parameter - -[source,js] --------------------------------------------------- - - "jlh": { - } --------------------------------------------------- -// NOTCONSOLE - -The scores are derived from the doc frequencies in _foreground_ and _background_ sets. The _absolute_ change in popularity (foregroundPercent - backgroundPercent) would favor common terms whereas the _relative_ change in popularity (foregroundPercent/ backgroundPercent) would favor rare terms. Rare vs common is essentially a precision vs recall balance and so the absolute and relative changes are multiplied to provide a sweet spot between precision and recall. - -===== Mutual information -Mutual information as described in "Information Retrieval", Manning et al., Chapter 13.5.1 can be used as significance score by adding the parameter - -[source,js] --------------------------------------------------- - - "mutual_information": { - "include_negatives": true - } --------------------------------------------------- -// NOTCONSOLE - -Mutual information does not differentiate between terms that are descriptive for the subset or for documents outside the subset. The significant terms therefore can contain terms that appear more or less frequent in the subset than outside the subset. To filter out the terms that appear less often in the subset than in documents outside the subset, `include_negatives` can be set to `false`. - -Per default, the assumption is that the documents in the bucket are also contained in the background. If instead you defined a custom background filter that represents a different set of documents that you want to compare to, set - -[source,js] --------------------------------------------------- - -"background_is_superset": false --------------------------------------------------- -// NOTCONSOLE - -===== Chi square -Chi square as described in "Information Retrieval", Manning et al., Chapter 13.5.2 can be used as significance score by adding the parameter - -[source,js] --------------------------------------------------- - - "chi_square": { - } --------------------------------------------------- -// NOTCONSOLE -Chi square behaves like mutual information and can be configured with the same parameters `include_negatives` and `background_is_superset`. - - -===== Google normalized distance -Google normalized distance as described in https://arxiv.org/pdf/cs/0412098v3.pdf["The Google Similarity Distance", Cilibrasi and Vitanyi, 2007] can be used as significance score by adding the parameter - -[source,js] --------------------------------------------------- - - "gnd": { - } --------------------------------------------------- -// NOTCONSOLE -`gnd` also accepts the `background_is_superset` parameter. - -[role="xpack"] -[[p-value-score]] -===== p-value score - -The p-value is the probability of obtaining test results at least as extreme as -the results actually observed, under the assumption that the null hypothesis is -correct. The p-value is calculated assuming that the foreground set and the -background set are independent -https://en.wikipedia.org/wiki/Bernoulli_trial[Bernoulli trials], with the null -hypothesis that the probabilities are the same. - -====== Example usage - -This example calculates the p-value score for terms `user_agent.version` given -the foreground set of "ended in failure" versus "NOT ended in failure". - -`"background_is_superset": false` indicates that the background set does -not contain the counts of the foreground set as they are filtered out. - -`"normalize_above": 1000` facilitates returning consistent significance results -at various scales. `1000` indicates that term counts greater than `1000` are -scaled down by a factor of `1000/term_count`. - -[source,console] --------------------------------------------------- -GET /_search -{ - "query": { - "bool": { - "filter": [ - { - "term": { - "event.outcome": "failure" - } - }, - { - "range": { - "@timestamp": { - "gte": "2021-02-01", - "lt": "2021-02-04" - } - } - }, - { - "term": { - "service.name": { - "value": "frontend-node" - } - } - } - ] - } - }, - "aggs": { - "failure_p_value": { - "significant_terms": { - "field": "user_agent.version", - "background_filter": { - "bool": { - "must_not": [ - { - "term": { - "event.outcome": "failure" - } - } - ], - "filter": [ - { - "range": { - "@timestamp": { - "gte": "2021-02-01", - "lt": "2021-02-04" - } - } - }, - { - "term": { - "service.name": { - "value": "frontend-node" - } - } - } - ] - } - }, - "p_value": {"background_is_superset": false, "normalize_above": 1000} - } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search?size=0/] - -===== Percentage -A simple calculation of the number of documents in the foreground sample with a term divided by the number of documents in the background with the term. -By default this produces a score greater than zero and less than one. - -The benefit of this heuristic is that the scoring logic is simple to explain to anyone familiar with a "per capita" statistic. However, for fields with high cardinality there is a tendency for this heuristic to select the rarest terms such as typos that occur only once because they score 1/1 = 100%. - -It would be hard for a seasoned boxer to win a championship if the prize was awarded purely on the basis of percentage of fights won - by these rules a newcomer with only one fight under their belt would be impossible to beat. -Multiple observations are typically required to reinforce a view so it is recommended in these cases to set both `min_doc_count` and `shard_min_doc_count` to a higher value such as 10 in order to filter out the low-frequency terms that otherwise take precedence. - -[source,js] --------------------------------------------------- - - "percentage": { - } --------------------------------------------------- -// NOTCONSOLE - -===== Which one is best? - - -Roughly, `mutual_information` prefers high frequent terms even if they occur also frequently in the background. For example, in an analysis of natural language text this might lead to selection of stop words. `mutual_information` is unlikely to select very rare terms like misspellings. `gnd` prefers terms with a high co-occurrence and avoids selection of stopwords. It might be better suited for synonym detection. However, `gnd` has a tendency to select very rare terms that are, for example, a result of misspelling. `chi_square` and `jlh` are somewhat in-between. - -It is hard to say which one of the different heuristics will be the best choice as it depends on what the significant terms are used for (see for example http://courses.ischool.berkeley.edu/i256/f06/papers/yang97comparative.pdf[Yang and Pedersen, "A Comparative Study on Feature Selection in Text Categorization", 1997] for a study on using significant terms for feature selection for text classification). - -If none of the above measures suits your usecase than another option is to implement a custom significance measure: - -===== Scripted -Customized scores can be implemented via a script: - -[source,js] --------------------------------------------------- - - "script_heuristic": { - "script": { - "lang": "painless", - "source": "params._subset_freq/(params._superset_freq - params._subset_freq + 1)" - } - } --------------------------------------------------- -// NOTCONSOLE -Scripts can be inline (as in above example), indexed or stored on disk. For details on the options, see <>. - -Available parameters in the script are - -[horizontal] -`_subset_freq`:: Number of documents the term appears in the subset. -`_superset_freq`:: Number of documents the term appears in the superset. -`_subset_size`:: Number of documents in the subset. -`_superset_size`:: Number of documents in the superset. - -[[sig-terms-shard-size]] -===== Size & Shard Size - -The `size` parameter can be set to define how many term buckets should be returned out of the overall terms list. By -default, the node coordinating the search process will request each shard to provide its own top term buckets -and once all shards respond, it will reduce the results to the final list that will then be returned to the client. -If the number of unique terms is greater than `size`, the returned list can be slightly off and not accurate -(it could be that the term counts are slightly off and it could even be that a term that should have been in the top -size buckets was not returned). - -To ensure better accuracy a multiple of the final `size` is used as the number of terms to request from each shard -(`2 * (size * 1.5 + 10)`). To take manual control of this setting the `shard_size` parameter -can be used to control the volumes of candidate terms produced by each shard. - -Low-frequency terms can turn out to be the most interesting ones once all results are combined so the -significant_terms aggregation can produce higher-quality results when the `shard_size` parameter is set to -values significantly higher than the `size` setting. This ensures that a bigger volume of promising candidate terms are given -a consolidated review by the reducing node before the final selection. Obviously large candidate term lists -will cause extra network traffic and RAM usage so this is quality/cost trade off that needs to be balanced. If `shard_size` is set to -1 (the default) then `shard_size` will be automatically estimated based on the number of shards and the `size` parameter. - - -NOTE: `shard_size` cannot be smaller than `size` (as it doesn't make much sense). When it is, Elasticsearch will - override it and reset it to be equal to `size`. - -===== Minimum document count - -It is possible to only return terms that match more than a configured number of hits using the `min_doc_count` option: - -[source,console,id=significantterms-aggregation-min-document-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "tags": { - "significant_terms": { - "field": "tag", - "min_doc_count": 10 - } - } - } -} --------------------------------------------------- - -The above aggregation would only return tags which have been found in 10 hits or more. Default value is `3`. - - - - -Terms that score highly will be collected on a shard level and merged with the terms collected from other shards in a second step. However, the shard does not have the information about the global term frequencies available. The decision if a term is added to a candidate list depends only on the score computed on the shard using local shard frequencies, not the global frequencies of the word. The `min_doc_count` criterion is only applied after merging local terms statistics of all shards. In a way the decision to add the term as a candidate is made without being very _certain_ about if the term will actually reach the required `min_doc_count`. This might cause many (globally) high frequent terms to be missing in the final result if low frequent but high scoring terms populated the candidate lists. To avoid this, the `shard_size` parameter can be increased to allow more candidate terms on the shards. However, this increases memory consumption and network traffic. - - -[[search-aggregations-bucket-significantterms-shard-min-doc-count]] -===== `shard_min_doc_count` - -include::terms-aggregation.asciidoc[tag=min-doc-count] - -WARNING: Setting `min_doc_count` to `1` is generally not advised as it tends to return terms that - are typos or other bizarre curiosities. Finding more than one instance of a term helps - reinforce that, while still rare, the term was not the result of a one-off accident. The - default value of 3 is used to provide a minimum weight-of-evidence. - Setting `shard_min_doc_count` too high will cause significant candidate terms to be filtered out on a shard level. This value should be set much lower than `min_doc_count/#shards`. - - - -===== Custom background context - -The default source of statistical information for background term frequencies is the entire index and this -scope can be narrowed through the use of a `background_filter` to focus in on significant terms within a narrower -context: - -[source,console,id=significantterms-aggregation-custom-background-example] --------------------------------------------------- -GET /_search -{ - "query": { - "match": { - "city": "madrid" - } - }, - "aggs": { - "tags": { - "significant_terms": { - "field": "tag", - "background_filter": { - "term": { "text": "spain" } - } - } - } - } -} --------------------------------------------------- - -The above filter would help focus in on terms that were peculiar to the city of Madrid rather than revealing -terms like "Spanish" that are unusual in the full index's worldwide context but commonplace in the subset of documents containing the -word "Spain". - -WARNING: Use of background filters will slow the query as each term's postings must be filtered to determine a frequency - - -===== Filtering Values - -It is possible (although rarely required) to filter the values for which buckets will be created. This can be done using the `include` and -`exclude` parameters which are based on a regular expression string or arrays of exact terms. This functionality mirrors the features -described in the <> documentation. - -==== Collect mode - -To avoid memory issues, the `significant_terms` aggregation always computes child aggregations in `breadth_first` mode. -A description of the different collection modes can be found in the -<> documentation. - -==== Execution hint - -There are different mechanisms by which terms aggregations can be executed: - - - by using field values directly in order to aggregate data per-bucket (`map`) - - by using <> of the field and allocating one bucket per global ordinal (`global_ordinals`) - -Elasticsearch tries to have sensible defaults so this is something that generally doesn't need to be configured. - -`global_ordinals` is the default option for `keyword` field, it uses global ordinals to allocates buckets dynamically -so memory usage is linear to the number of values of the documents that are part of the aggregation scope. - -`map` should only be considered when very few documents match a query. Otherwise the ordinals-based execution mode -is significantly faster. By default, `map` is only used when running an aggregation on scripts, since they don't have -ordinals. - - -[source,console,id=significantterms-aggregation-execution-hint-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "tags": { - "significant_terms": { - "field": "tags", - "execution_hint": "map" <1> - } - } - } -} --------------------------------------------------- - -<1> the possible values are `map`, `global_ordinals` - -Please note that Elasticsearch will ignore this execution hint if it is not applicable. diff --git a/docs/reference/aggregations/bucket/significanttext-aggregation.asciidoc b/docs/reference/aggregations/bucket/significanttext-aggregation.asciidoc deleted file mode 100644 index 759fcb9ed1c09..0000000000000 --- a/docs/reference/aggregations/bucket/significanttext-aggregation.asciidoc +++ /dev/null @@ -1,482 +0,0 @@ -[[search-aggregations-bucket-significanttext-aggregation]] -=== Significant text aggregation -++++ -Significant text -++++ - -An aggregation that returns interesting or unusual occurrences of free-text terms in a set. -It is like the <> aggregation but differs in that: - -* It is specifically designed for use on type `text` fields -* It does not require field data or doc-values -* It re-analyzes text content on-the-fly meaning it can also filter duplicate sections of -noisy text that otherwise tend to skew statistics. - -WARNING: Re-analyzing _large_ result sets will require a lot of time and memory. It is recommended that the significant_text - aggregation is used as a child of either the <> or - <> aggregation to limit the analysis - to a _small_ selection of top-matching documents e.g. 200. This will typically improve speed, memory use and quality of - results. - -.Example use cases: -* Suggesting "H5N1" when users search for "bird flu" to help expand queries -* Suggesting keywords relating to stock symbol $ATI for use in an automated news classifier - -In these cases the words being selected are not simply the most popular terms in results. The most popular words tend to be -very boring (_and, of, the, we, I, they_ ...). -The significant words are the ones that have undergone a significant change in popularity measured between a _foreground_ and _background_ set. -If the term "H5N1" only exists in 5 documents in a 10 million document index and yet is found in 4 of the 100 documents that make up a user's search results -that is significant and probably very relevant to their search. 5/10,000,000 vs 4/100 is a big swing in frequency. - -==== Basic use - -In the typical use case, the _foreground_ set of interest is a selection of the top-matching search results for a query -and the _background_ set used for statistical comparisons is the index or indices from which the results were gathered. - -Example: - -[source,console,id=significanttext-aggregation-example] --------------------------------------------------- -GET news/_search -{ - "query": { - "match": { "content": "Bird flu" } - }, - "aggregations": { - "my_sample": { - "sampler": { - "shard_size": 100 - }, - "aggregations": { - "keywords": { - "significant_text": { "field": "content" } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:news] - - -Response: - -[source,console-result] --------------------------------------------------- -{ - "took": 9, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations" : { - "my_sample": { - "doc_count": 100, - "keywords" : { - "doc_count": 100, - "buckets" : [ - { - "key": "h5n1", - "doc_count": 4, - "score": 4.71235374214817, - "bg_count": 5 - } - ... - ] - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[skip:historically skipped] - -The results show that "h5n1" is one of several terms strongly associated with bird flu. -It only occurs 5 times in our index as a whole (see the `bg_count`) and yet 4 of these -were lucky enough to appear in our 100 document sample of "bird flu" results. That suggests -a significant word and one which the user can potentially add to their search. - -[[filter-duplicate-text-noisy-data]] -==== Dealing with noisy data using `filter_duplicate_text` -Free-text fields often contain a mix of original content and mechanical copies of text (cut-and-paste biographies, email reply chains, -retweets, boilerplate headers/footers, page navigation menus, sidebar news links, copyright notices, standard disclaimers, addresses). - -In real-world data these duplicate sections of text tend to feature heavily in `significant_text` results if they aren't filtered out. -Filtering near-duplicate text is a difficult task at index-time but we can cleanse the data on-the-fly at query time using the -`filter_duplicate_text` setting. - - -First let's look at an unfiltered real-world example using the https://research.signalmedia.co/newsir16/signal-dataset.html[Signal media dataset] of -a million news articles covering a wide variety of news. Here are the raw significant text results for a search for the articles -mentioning "elasticsearch": - - -[source,js] --------------------------------------------------- -{ - ... - "aggregations": { - "sample": { - "doc_count": 35, - "keywords": { - "doc_count": 35, - "buckets": [ - { - "key": "elasticsearch", - "doc_count": 35, - "score": 28570.428571428572, - "bg_count": 35 - }, - ... - { - "key": "currensee", - "doc_count": 8, - "score": 6530.383673469388, - "bg_count": 8 - }, - ... - { - "key": "pozmantier", - "doc_count": 4, - "score": 3265.191836734694, - "bg_count": 4 - }, - ... - -} --------------------------------------------------- -// NOTCONSOLE - -The uncleansed documents have thrown up some odd-looking terms that are, on the face of it, statistically -correlated with appearances of our search term "elasticsearch" e.g. "pozmantier". -We can drill down into examples of these documents to see why pozmantier is connected using this query: - -[source,console,id=significanttext-aggregation-pozmantier-example] --------------------------------------------------- -GET news/_search -{ - "query": { - "simple_query_string": { - "query": "+elasticsearch +pozmantier" - } - }, - "_source": [ - "title", - "source" - ], - "highlight": { - "fields": { - "content": {} - } - } -} --------------------------------------------------- -// TEST[setup:news] - -The results show a series of very similar news articles about a judging panel for a number of tech projects: - -[source,js] --------------------------------------------------- -{ - ... - "hits": { - "hits": [ - { - ... - "_source": { - "source": "Presentation Master", - "title": "T.E.N. Announces Nominees for the 2015 ISE® North America Awards" - }, - "highlight": { - "content": [ - "City of San Diego Mike Pozmantier, Program Manager, Cyber Security Division, Department of", - " Janus, Janus ElasticSearch Security Visualization Engine " - ] - } - }, - { - ... - "_source": { - "source": "RCL Advisors", - "title": "T.E.N. Announces Nominees for the 2015 ISE(R) North America Awards" - }, - "highlight": { - "content": [ - "Mike Pozmantier, Program Manager, Cyber Security Division, Department of Homeland Security S&T", - "Janus, Janus ElasticSearch Security Visualization Engine" - ] - } - }, - ... --------------------------------------------------- -// NOTCONSOLE -Mike Pozmantier was one of many judges on a panel and elasticsearch was used in one of many projects being judged. - -As is typical, this lengthy press release was cut-and-paste by a variety of news sites and consequently any rare names, numbers or -typos they contain become statistically correlated with our matching query. - -Fortunately similar documents tend to rank similarly so as part of examining the stream of top-matching documents the significant_text -aggregation can apply a filter to remove sequences of any 6 or more tokens that have already been seen. Let's try this same query now but -with the `filter_duplicate_text` setting turned on: - -[source,console,id=significanttext-aggregation-filter-duplicate-text-example] --------------------------------------------------- -GET news/_search -{ - "query": { - "match": { - "content": "elasticsearch" - } - }, - "aggs": { - "sample": { - "sampler": { - "shard_size": 100 - }, - "aggs": { - "keywords": { - "significant_text": { - "field": "content", - "filter_duplicate_text": true - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:news] - -The results from analysing our deduplicated text are obviously of higher quality to anyone familiar with the elastic stack: - -[source,js] --------------------------------------------------- -{ - ... - "aggregations": { - "sample": { - "doc_count": 35, - "keywords": { - "doc_count": 35, - "buckets": [ - { - "key": "elasticsearch", - "doc_count": 22, - "score": 11288.001166180758, - "bg_count": 35 - }, - { - "key": "logstash", - "doc_count": 3, - "score": 1836.648979591837, - "bg_count": 4 - }, - { - "key": "kibana", - "doc_count": 3, - "score": 1469.3020408163263, - "bg_count": 5 - } - ] - } - } - } -} --------------------------------------------------- -// NOTCONSOLE - -Mr Pozmantier and other one-off associations with elasticsearch no longer appear in the aggregation -results as a consequence of copy-and-paste operations or other forms of mechanical repetition. - -If your duplicate or near-duplicate content is identifiable via a single-value indexed field (perhaps -a hash of the article's `title` text or an `original_press_release_url` field) then it would be more -efficient to use a parent <> aggregation -to eliminate these documents from the sample set based on that single key. The less duplicate content you can feed into -the significant_text aggregation up front the better in terms of performance. - - -.How are the significance scores calculated? -********************************** -The numbers returned for scores are primarily intended for ranking different suggestions sensibly rather than something easily -understood by end users. The scores are derived from the doc frequencies in _foreground_ and _background_ sets. In brief, a -term is considered significant if there is a noticeable difference in the frequency in which a term appears in the subset and -in the background. The way the terms are ranked can be configured, see "Parameters" section. - -********************************** - -.Use the _"like this but not this"_ pattern -********************************** -You can spot mis-categorized content by first searching a structured field e.g. `category:adultMovie` and use significant_text on the -text "movie_description" field. Take the suggested words (I'll leave them to your imagination) and then search for all movies NOT marked as category:adultMovie but containing these keywords. -You now have a ranked list of badly-categorized movies that you should reclassify or at least remove from the "familyFriendly" category. - -The significance score from each term can also provide a useful `boost` setting to sort matches. -Using the `minimum_should_match` setting of the `terms` query with the keywords will help control the balance of precision/recall in the result set i.e -a high setting would have a small number of relevant results packed full of keywords and a setting of "1" would produce a more exhaustive results set with all documents containing _any_ keyword. - -********************************** - - - -==== Limitations - - -===== No support for child aggregations -The significant_text aggregation intentionally does not support the addition of child aggregations because: - -* It would come with a high memory cost -* It isn't a generally useful feature and there is a workaround for those that need it - -The volume of candidate terms is generally very high and these are pruned heavily before the final -results are returned. Supporting child aggregations would generate additional churn and be inefficient. -Clients can always take the heavily-trimmed set of results from a `significant_text` request and -make a subsequent follow-up query using a `terms` aggregation with an `include` clause and child -aggregations to perform further analysis of selected keywords in a more efficient fashion. - -===== No support for nested objects - -The significant_text aggregation currently also cannot be used with text fields in -nested objects, because it works with the document JSON source. This makes this -feature inefficient when matching nested docs from stored JSON given a matching -Lucene docID. - -===== Approximate counts -The counts of how many documents contain a term provided in results are based on summing the samples returned from each shard and -as such may be: - -* low if certain shards did not provide figures for a given term in their top sample -* high when considering the background frequency as it may count occurrences found in deleted documents - -Like most design decisions, this is the basis of a trade-off in which we have chosen to provide fast performance at the cost of some (typically small) inaccuracies. -However, the `size` and `shard size` settings covered in the next section provide tools to help control the accuracy levels. - -[[significanttext-aggregation-parameters]] -==== Parameters - -===== Significance heuristics - -This aggregation supports the same scoring heuristics (JLH, mutual_information, gnd, chi_square etc) as the <> aggregation - -[[sig-text-shard-size]] -===== Size & Shard Size - -The `size` parameter can be set to define how many term buckets should be returned out of the overall terms list. By -default, the node coordinating the search process will request each shard to provide its own top term buckets -and once all shards respond, it will reduce the results to the final list that will then be returned to the client. -If the number of unique terms is greater than `size`, the returned list can be slightly off and not accurate -(it could be that the term counts are slightly off and it could even be that a term that should have been in the top -size buckets was not returned). - -To ensure better accuracy a multiple of the final `size` is used as the number of terms to request from each shard -(`2 * (size * 1.5 + 10)`). To take manual control of this setting the `shard_size` parameter -can be used to control the volumes of candidate terms produced by each shard. - -Low-frequency terms can turn out to be the most interesting ones once all results are combined so the -significant_terms aggregation can produce higher-quality results when the `shard_size` parameter is set to -values significantly higher than the `size` setting. This ensures that a bigger volume of promising candidate terms are given -a consolidated review by the reducing node before the final selection. Obviously large candidate term lists -will cause extra network traffic and RAM usage so this is quality/cost trade off that needs to be balanced. If `shard_size` is set to -1 (the default) then `shard_size` will be automatically estimated based on the number of shards and the `size` parameter. - - -NOTE: `shard_size` cannot be smaller than `size` (as it doesn't make much sense). When it is, elasticsearch will - override it and reset it to be equal to `size`. - -===== Minimum document count - -It is possible to only return terms that match more than a configured number of hits using the `min_doc_count` option. -The Default value is 3. - -Terms that score highly will be collected on a shard level and merged with the terms collected from other shards in a second step. -However, the shard does not have the information about the global term frequencies available. The decision if a term is added to a -candidate list depends only on the score computed on the shard using local shard frequencies, not the global frequencies of the word. -The `min_doc_count` criterion is only applied after merging local terms statistics of all shards. In a way the decision to add the -term as a candidate is made without being very _certain_ about if the term will actually reach the required `min_doc_count`. -This might cause many (globally) high frequent terms to be missing in the final result if low frequent but high scoring terms populated -the candidate lists. To avoid this, the `shard_size` parameter can be increased to allow more candidate terms on the shards. -However, this increases memory consumption and network traffic. - -[[search-aggregations-bucket-significanttext-shard-min-doc-count]] -====== `shard_min_doc_count` - -include::terms-aggregation.asciidoc[tag=min-doc-count] - -WARNING: Setting `min_doc_count` to `1` is generally not advised as it tends to return terms that - are typos or other bizarre curiosities. Finding more than one instance of a term helps - reinforce that, while still rare, the term was not the result of a one-off accident. The - default value of 3 is used to provide a minimum weight-of-evidence. - Setting `shard_min_doc_count` too high will cause significant candidate terms to be filtered out on a shard level. - This value should be set much lower than `min_doc_count/#shards`. - - - -===== Custom background context - -The default source of statistical information for background term frequencies is the entire index and this -scope can be narrowed through the use of a `background_filter` to focus in on significant terms within a narrower -context: - -[source,console,id=significanttext-aggregation-custom-background-example] --------------------------------------------------- -GET news/_search -{ - "query": { - "match": { - "content": "madrid" - } - }, - "aggs": { - "tags": { - "significant_text": { - "field": "content", - "background_filter": { - "term": { "content": "spain" } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:news] - -The above filter would help focus in on terms that were peculiar to the city of Madrid rather than revealing -terms like "Spanish" that are unusual in the full index's worldwide context but commonplace in the subset of documents containing the -word "Spain". - -WARNING: Use of background filters will slow the query as each term's postings must be filtered to determine a frequency - - -===== Dealing with source and index mappings - -Ordinarily the indexed field name and the original JSON field being retrieved share the same name. -However with more complex field mappings using features like `copy_to` the source -JSON field(s) and the indexed field being aggregated can differ. -In these cases it is possible to list the JSON _source fields from which text -will be analyzed using the `source_fields` parameter: - -[source,console,id=significanttext-aggregation-mappings-example] --------------------------------------------------- -GET news/_search -{ - "query": { - "match": { - "custom_all": "elasticsearch" - } - }, - "aggs": { - "tags": { - "significant_text": { - "field": "custom_all", - "source_fields": [ "content", "title" ] - } - } - } -} --------------------------------------------------- -// TEST[setup:news] - - -===== Filtering Values - -It is possible (although rarely required) to filter the values for which buckets will be created. This can be done using the `include` and -`exclude` parameters which are based on a regular expression string or arrays of exact terms. This functionality mirrors the features -described in the <> documentation. - - diff --git a/docs/reference/aggregations/bucket/terms-aggregation.asciidoc b/docs/reference/aggregations/bucket/terms-aggregation.asciidoc deleted file mode 100644 index f363d71a4203a..0000000000000 --- a/docs/reference/aggregations/bucket/terms-aggregation.asciidoc +++ /dev/null @@ -1,859 +0,0 @@ -[[search-aggregations-bucket-terms-aggregation]] -=== Terms aggregation -++++ -Terms -++++ - -A multi-bucket value source based aggregation where buckets are dynamically built - one per unique value. - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /products -{ - "mappings": { - "properties": { - "genre": { - "type": "keyword" - }, - "product": { - "type": "keyword" - } - } - } -} - -POST /products/_bulk?refresh -{"index":{"_id":0}} -{"genre": "rock", "product": "Product A"} -{"index":{"_id":1}} -{"genre": "rock", "product": "Product B"} -{"index":{"_id":2}} -{"genre": "rock", "product": "Product C"} -{"index":{"_id":3}} -{"genre": "jazz", "product": "Product D"} -{"index":{"_id":4}} -{"genre": "jazz", "product": "Product E"} -{"index":{"_id":5}} -{"genre": "electronic", "product": "Anthology A"} -{"index":{"_id":6}} -{"genre": "electronic", "product": "Anthology A"} -{"index":{"_id":7}} -{"genre": "electronic", "product": "Product F"} -{"index":{"_id":8}} -{"genre": "electronic", "product": "Product G"} -{"index":{"_id":9}} -{"genre": "electronic", "product": "Product H"} -{"index":{"_id":10}} -{"genre": "electronic", "product": "Product I"} -------------------------------------------------- -// TESTSETUP - -////////////////////////// - -Example: - -[source,console,id=terms-aggregation-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "genres": { - "terms": { "field": "genre" } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search\?filter_path=aggregations/] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "genres": { - "doc_count_error_upper_bound": 0, <1> - "sum_other_doc_count": 0, <2> - "buckets": [ <3> - { - "key": "electronic", - "doc_count": 6 - }, - { - "key": "rock", - "doc_count": 3 - }, - { - "key": "jazz", - "doc_count": 2 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] - -<1> an upper bound of the error on the document counts for each term, see <> -<2> when there are lots of unique terms, Elasticsearch only returns the top terms; this number is the sum of the document counts for all buckets that are not part of the response -<3> the list of the top buckets, the meaning of `top` being defined by the <> - -[[search-aggregations-bucket-terms-aggregation-types]] -The `field` can be <>, <>, <>, <>, -or <>. - -NOTE: By default, you cannot run a `terms` aggregation on a `text` field. Use a -`keyword` <> instead. Alternatively, you can enable -<> on the `text` field to create buckets for the field's -<> terms. Enabling `fielddata` can significantly increase -memory usage. - -[[search-aggregations-bucket-terms-aggregation-size]] -==== Size - -By default, the `terms` aggregation returns the top ten terms with the most -documents. Use the `size` parameter to return more terms, up to the -<> limit. - -If your data contains 100 or 1000 unique terms, you can increase the `size` of -the `terms` aggregation to return them all. If you have more unique terms and -you need them all, use the -<> -instead. - -Larger values of `size` use more memory to compute and, push the whole -aggregation close to the `max_buckets` limit. You'll know you've gone too large -if the request fails with a message about `max_buckets`. - -[[search-aggregations-bucket-terms-aggregation-shard-size]] -==== Shard size - -To get more accurate results, the `terms` agg fetches more than -the top `size` terms from each shard. It fetches the top `shard_size` terms, -which defaults to `size * 1.5 + 10`. - -This is to handle the case when one term has many documents on one shard but is -just below the `size` threshold on all other shards. If each shard only -returned `size` terms, the aggregation would return an partial doc count for -the term. So `terms` returns more terms in an attempt to catch the missing -terms. This helps, but it's still quite possible to return a partial doc -count for a term. It just takes a term with more disparate per-shard doc counts. - -You can increase `shard_size` to better account for these disparate doc counts -and improve the accuracy of the selection of top terms. It is much cheaper to increase -the `shard_size` than to increase the `size`. However, it still takes more -bytes over the wire and waiting in memory on the coordinating node. - -IMPORTANT: This guidance only applies if you're using the `terms` aggregation's -default sort `order`. If you're sorting by anything other than document count in -descending order, see <>. - -NOTE: `shard_size` cannot be smaller than `size` (as it doesn't make much sense). When it is, Elasticsearch will - override it and reset it to be equal to `size`. - -[[terms-agg-doc-count-error]] -==== Document count error - -Even with a larger `shard_size` value, `doc_count` values for a `terms` -aggregation may be approximate. As a result, any sub-aggregations on the `terms` -aggregation may also be approximate. - -`sum_other_doc_count` is the number of documents that didn't make it into the -the top `size` terms. If this is greater than `0`, you can be sure that the -`terms` agg had to throw away some buckets, either because they didn't fit into -`size` on the coordinating node or they didn't fit into `shard_size` on the -data node. - -==== Per bucket document count error - -If you set the `show_term_doc_count_error` parameter to `true`, the `terms` -aggregation will include `doc_count_error_upper_bound`, which is an upper bound -to the error on the `doc_count` returned by each shard. It's the -sum of the size of the largest bucket on each shard that didn't fit into -`shard_size`. - -In more concrete terms, imagine there is one bucket that is very large on one -shard and just outside the `shard_size` on all the other shards. In that case, -the `terms` agg will return the bucket because it is large, but it'll be missing -data from many documents on the shards where the term fell below the `shard_size` threshold. -`doc_count_error_upper_bound` is the maximum number of those missing documents. - -[source,console,id=terms-aggregation-doc-count-error-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "products": { - "terms": { - "field": "product", - "size": 5, - "show_term_doc_count_error": true - } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search\?filter_path=aggregations/] - - -These errors can only be calculated in this way when the terms are ordered by descending document count. When the aggregation is -ordered by the terms values themselves (either ascending or descending) there is no error in the document count since if a shard -does not return a particular term which appears in the results from another shard, it must not have that term in its index. When the -aggregation is either sorted by a sub aggregation or in order of ascending document count, the error in the document counts cannot be -determined and is given a value of -1 to indicate this. - -[[search-aggregations-bucket-terms-aggregation-order]] -==== Order - -By default, the `terms` aggregation orders terms by descending document -`_count`. This produces a bounded <> -error that {es} can report. - -You can use the `order` parameter to specify a different sort order, but we -don't recommend it. It is extremely easy to create a terms ordering that will -just return wrong results, and not obvious to see when you have done so. -Change this only with caution. - -WARNING: Especially avoid using `"order": { "_count": "asc" }`. If you need to find rare -terms, use the -<> aggregation -instead. Due to the way the `terms` aggregation -<>, sorting by ascending doc count often produces inaccurate results. - - -===== Ordering by the term value -In this case, the buckets are ordered by the actual term values, such as -lexicographic order for keywords or numerically for numbers. This sorting is -safe in both ascending and descending directions, and produces accurate -results. - -Example of ordering the buckets alphabetically by their terms in an ascending manner: - -[source,console,id=terms-aggregation-asc-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "genres": { - "terms": { - "field": "genre", - "order": { "_key": "asc" } - } - } - } -} --------------------------------------------------- - -===== Ordering by a sub aggregation - -WARNING: Sorting by a sub aggregation generally produces incorrect ordering, due to the way the `terms` aggregation -<>. - -There are two cases when sub-aggregation ordering is safe and returns correct -results: sorting by a maximum in descending order, or sorting by a minimum in -ascending order. These approaches work because they align with the behavior of -sub aggregations. That is, if you're looking for the largest maximum or the -smallest minimum, the global answer (from combined shards) must be included in -one of the local shard answers. Conversely, the smallest maximum and largest -minimum wouldn't be accurately computed. - -Note also that in these cases, the ordering is correct but the doc counts and -non-ordering sub aggregations may still have errors (and {es} does not calculate a -bound for those errors). - -Ordering the buckets by single value metrics sub-aggregation (identified by the aggregation name): - -[source,console,id=terms-aggregation-subaggregation-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "genres": { - "terms": { - "field": "genre", - "order": { "max_play_count": "desc" } - }, - "aggs": { - "max_play_count": { "max": { "field": "play_count" } } - } - } - } -} --------------------------------------------------- - -Ordering the buckets by multi value metrics sub-aggregation (identified by the aggregation name): - -[source,console,id=terms-aggregation-multivalue-subaggregation-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "genres": { - "terms": { - "field": "genre", - "order": { "playback_stats.max": "desc" } - }, - "aggs": { - "playback_stats": { "stats": { "field": "play_count" } } - } - } - } -} --------------------------------------------------- - -[NOTE] -.Pipeline aggs cannot be used for sorting -======================================= - -<> are run during the -reduce phase after all other aggregations have already completed. For this -reason, they cannot be used for ordering. - -======================================= - -It is also possible to order the buckets based on a "deeper" aggregation in the hierarchy. This is supported as long -as the aggregations path are of a single-bucket type, where the last aggregation in the path may either be a single-bucket -one or a metrics one. If it's a single-bucket type, the order will be defined by the number of docs in the bucket (i.e. `doc_count`), -in case it's a metrics one, the same rules as above apply (where the path must indicate the metric name to sort by in case of -a multi-value metrics aggregation, and in case of a single-value metrics aggregation the sort will be applied on that value). - -The path must be defined in the following form: - -// {wikipedia}/Extended_Backus%E2%80%93Naur_Form -[source,ebnf] --------------------------------------------------- -AGG_SEPARATOR = '>' ; -METRIC_SEPARATOR = '.' ; -AGG_NAME = ; -METRIC = ; -PATH = [ , ]* [ , ] ; --------------------------------------------------- - -[source,console,id=terms-aggregation-hierarchy-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "countries": { - "terms": { - "field": "artist.country", - "order": { "rock>playback_stats.avg": "desc" } - }, - "aggs": { - "rock": { - "filter": { "term": { "genre": "rock" } }, - "aggs": { - "playback_stats": { "stats": { "field": "play_count" } } - } - } - } - } - } -} --------------------------------------------------- - -The above will sort the artist's countries buckets based on the average play count among the rock songs. - -Multiple criteria can be used to order the buckets by providing an array of order criteria such as the following: - -[source,console,id=terms-aggregation-multicriteria-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "countries": { - "terms": { - "field": "artist.country", - "order": [ { "rock>playback_stats.avg": "desc" }, { "_count": "desc" } ] - }, - "aggs": { - "rock": { - "filter": { "term": { "genre": "rock" } }, - "aggs": { - "playback_stats": { "stats": { "field": "play_count" } } - } - } - } - } - } -} --------------------------------------------------- - -The above will sort the artist's countries buckets based on the average play count among the rock songs and then by -their `doc_count` in descending order. - -NOTE: In the event that two buckets share the same values for all order criteria the bucket's term value is used as a -tie-breaker in ascending alphabetical order to prevent non-deterministic ordering of buckets. - -===== Ordering by count ascending - -Ordering terms by ascending document `_count` produces an unbounded error that -{es} can't accurately report. We therefore strongly recommend against using -`"order": { "_count": "asc" }` as shown in the following example: - -[source,console,id=terms-aggregation-count-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "genres": { - "terms": { - "field": "genre", - "order": { "_count": "asc" } - } - } - } -} --------------------------------------------------- - -==== Minimum document count - -It is possible to only return terms that match more than a configured number of hits using the `min_doc_count` option: - -[source,console,id=terms-aggregation-min-doc-count-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "tags": { - "terms": { - "field": "tags", - "min_doc_count": 10 - } - } - } -} --------------------------------------------------- - -The above aggregation would only return tags which have been found in 10 hits or more. Default value is `1`. - - -Terms are collected and ordered on a shard level and merged with the terms collected from other shards in a second step. However, the shard does not have the information about the global document count available. The decision if a term is added to a candidate list depends only on the order computed on the shard using local shard frequencies. The `min_doc_count` criterion is only applied after merging local terms statistics of all shards. In a way the decision to add the term as a candidate is made without being very _certain_ about if the term will actually reach the required `min_doc_count`. This might cause many (globally) high frequent terms to be missing in the final result if low frequent terms populated the candidate lists. To avoid this, the `shard_size` parameter can be increased to allow more candidate terms on the shards. However, this increases memory consumption and network traffic. - -[[search-aggregations-bucket-terms-shard-min-doc-count]] -===== `shard_min_doc_count` - -// tag::min-doc-count[] -The parameter `shard_min_doc_count` regulates the _certainty_ a shard has if the term should actually be added to the candidate list or not with respect to the `min_doc_count`. Terms will only be considered if their local shard frequency within the set is higher than the `shard_min_doc_count`. If your dictionary contains many low frequent terms and you are not interested in those (for example misspellings), then you can set the `shard_min_doc_count` parameter to filter out candidate terms on a shard level that will with a reasonable certainty not reach the required `min_doc_count` even after merging the local counts. `shard_min_doc_count` is set to `0` per default and has no effect unless you explicitly set it. -// end::min-doc-count[] - - -NOTE: Setting `min_doc_count`=`0` will also return buckets for terms that didn't match any hit. However, some of - the returned terms which have a document count of zero might only belong to deleted documents or documents - from other types, so there is no warranty that a `match_all` query would find a positive document count for - those terms. - -WARNING: When NOT sorting on `doc_count` descending, high values of `min_doc_count` may return a number of buckets - which is less than `size` because not enough data was gathered from the shards. Missing buckets can be - back by increasing `shard_size`. - Setting `shard_min_doc_count` too high will cause terms to be filtered out on a shard level. This value should be set much lower than `min_doc_count/#shards`. - -[[search-aggregations-bucket-terms-aggregation-script]] -==== Script - -Use a <> if the data in your documents doesn't -exactly match what you'd like to aggregate. If, for example, "anthologies" -need to be in a special category then you could run this: - -[source,console,id=terms-aggregation-script-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "runtime_mappings": { - "normalized_genre": { - "type": "keyword", - "script": """ - String genre = doc['genre'].value; - if (doc['product'].value.startsWith('Anthology')) { - emit(genre + ' anthology'); - } else { - emit(genre); - } - """ - } - }, - "aggs": { - "genres": { - "terms": { - "field": "normalized_genre" - } - } - } -} --------------------------------------------------- - -Which will look like: - -[source,console-result] --------------------------------------------------- -{ - "aggregations": { - "genres": { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "electronic", - "doc_count": 4 - }, - { - "key": "rock", - "doc_count": 3 - }, - { - "key": "electronic anthology", - "doc_count": 2 - }, - { - "key": "jazz", - "doc_count": 2 - } - ] - } - }, - ... -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": "$body.took", "timed_out": false, "_shards": "$body._shards", "hits": "$body.hits"/] - -This is a little slower because the runtime field has to access two fields -instead of one and because there are some optimizations that work on -non-runtime `keyword` fields that we have to give up for for runtime -`keyword` fields. If you need the speed, you can index the -`normalized_genre` field. - -// TODO when we have calculated fields we can link to them here. - - -==== Filtering Values - -It is possible to filter the values for which buckets will be created. This can be done using the `include` and -`exclude` parameters which are based on regular expression strings or arrays of exact values. Additionally, -`include` clauses can filter using `partition` expressions. - -===== Filtering Values with regular expressions - -[source,console,id=terms-aggregation-regex-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "tags": { - "terms": { - "field": "tags", - "include": ".*sport.*", - "exclude": "water_.*" - } - } - } -} --------------------------------------------------- - -In the above example, buckets will be created for all the tags that has the word `sport` in them, except those starting -with `water_` (so the tag `water_sports` will not be aggregated). The `include` regular expression will determine what -values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When -both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`. - -The syntax is the same as <>. - -===== Filtering Values with exact values - -For matching based on exact values the `include` and `exclude` parameters can simply take an array of -strings that represent the terms as they are found in the index: - -[source,console,id=terms-aggregation-exact-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "JapaneseCars": { - "terms": { - "field": "make", - "include": [ "mazda", "honda" ] - } - }, - "ActiveCarManufacturers": { - "terms": { - "field": "make", - "exclude": [ "rover", "jensen" ] - } - } - } -} --------------------------------------------------- - -===== Filtering Values with partitions - -Sometimes there are too many unique terms to process in a single request/response pair so -it can be useful to break the analysis up into multiple requests. -This can be achieved by grouping the field's values into a number of partitions at query-time and processing -only one partition in each request. -Consider this request which is looking for accounts that have not logged any access recently: - -[source,console,id=terms-aggregation-partitions-example] --------------------------------------------------- -GET /_search -{ - "size": 0, - "aggs": { - "expired_sessions": { - "terms": { - "field": "account_id", - "include": { - "partition": 0, - "num_partitions": 20 - }, - "size": 10000, - "order": { - "last_access": "asc" - } - }, - "aggs": { - "last_access": { - "max": { - "field": "access_date" - } - } - } - } - } -} --------------------------------------------------- - -This request is finding the last logged access date for a subset of customer accounts because we -might want to expire some customer accounts who haven't been seen for a long while. -The `num_partitions` setting has requested that the unique account_ids are organized evenly into twenty -partitions (0 to 19). and the `partition` setting in this request filters to only consider account_ids falling -into partition 0. Subsequent requests should ask for partitions 1 then 2 etc to complete the expired-account analysis. - -Note that the `size` setting for the number of results returned needs to be tuned with the `num_partitions`. -For this particular account-expiration example the process for balancing values for `size` and `num_partitions` would be as follows: - -1. Use the `cardinality` aggregation to estimate the total number of unique account_id values -2. Pick a value for `num_partitions` to break the number from 1) up into more manageable chunks -3. Pick a `size` value for the number of responses we want from each partition -4. Run a test request - -If we have a circuit-breaker error we are trying to do too much in one request and must increase `num_partitions`. -If the request was successful but the last account ID in the date-sorted test response was still an account we might want to -expire then we may be missing accounts of interest and have set our numbers too low. We must either - -* increase the `size` parameter to return more results per partition (could be heavy on memory) or -* increase the `num_partitions` to consider less accounts per request (could increase overall processing time as we need to make more requests) - -Ultimately this is a balancing act between managing the Elasticsearch resources required to process a single request and the volume -of requests that the client application must issue to complete a task. - -WARNING: Partitions cannot be used together with an `exclude` parameter. - -==== Multi-field terms aggregation - -The `terms` aggregation does not support collecting terms from multiple fields -in the same document. The reason is that the `terms` agg doesn't collect the -string term values themselves, but rather uses -<> -to produce a list of all of the unique values in the field. Global ordinals -results in an important performance boost which would not be possible across -multiple fields. - -There are three approaches that you can use to perform a `terms` agg across -multiple fields: - -<>:: - -Use a script to retrieve terms from multiple fields. This disables the global -ordinals optimization and will be slower than collecting terms from a single -field, but it gives you the flexibility to implement this option at search -time. - -<>:: - -If you know ahead of time that you want to collect the terms from two or more -fields, then use `copy_to` in your mapping to create a new dedicated field at -index time which contains the values from both fields. You can aggregate on -this single field, which will benefit from the global ordinals optimization. - -<>:: - -Use multi_terms aggregation to combine terms from multiple fields into a compound key. This -also disables the global ordinals and will be slower than collecting terms from a single field. -It is faster but less flexible than using a script. - -[[search-aggregations-bucket-terms-aggregation-collect]] -==== Collect mode - -Deferring calculation of child aggregations - -For fields with many unique terms and a small number of required results it can be more efficient to delay the calculation -of child aggregations until the top parent-level aggs have been pruned. Ordinarily, all branches of the aggregation tree -are expanded in one depth-first pass and only then any pruning occurs. -In some scenarios this can be very wasteful and can hit memory constraints. -An example problem scenario is querying a movie database for the 10 most popular actors and their 5 most common co-stars: - -[source,console,id=terms-aggregation-collect-mode-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "actors": { - "terms": { - "field": "actors", - "size": 10 - }, - "aggs": { - "costars": { - "terms": { - "field": "actors", - "size": 5 - } - } - } - } - } -} --------------------------------------------------- - -Even though the number of actors may be comparatively small and we want only 50 result buckets there is a combinatorial explosion of buckets -during calculation - a single actor can produce n² buckets where n is the number of actors. The sane option would be to first determine -the 10 most popular actors and only then examine the top co-stars for these 10 actors. This alternative strategy is what we call the `breadth_first` collection -mode as opposed to the `depth_first` mode. - -NOTE: The `breadth_first` is the default mode for fields with a cardinality bigger than the requested size or when the cardinality is unknown (numeric fields or scripts for instance). -It is possible to override the default heuristic and to provide a collect mode directly in the request: - -[source,console,id=terms-aggregation-breadth-first-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "actors": { - "terms": { - "field": "actors", - "size": 10, - "collect_mode": "breadth_first" <1> - }, - "aggs": { - "costars": { - "terms": { - "field": "actors", - "size": 5 - } - } - } - } - } -} --------------------------------------------------- - -<1> the possible values are `breadth_first` and `depth_first` - -When using `breadth_first` mode the set of documents that fall into the uppermost buckets are -cached for subsequent replay so there is a memory overhead in doing this which is linear with the number of matching documents. -Note that the `order` parameter can still be used to refer to data from a child aggregation when using the `breadth_first` setting - the parent -aggregation understands that this child aggregation will need to be called first before any of the other child aggregations. - -WARNING: Nested aggregations such as `top_hits` which require access to score information under an aggregation that uses the `breadth_first` -collection mode need to replay the query on the second pass but only for the documents belonging to the top buckets. - -[[search-aggregations-bucket-terms-aggregation-execution-hint]] -==== Execution hint - -There are different mechanisms by which terms aggregations can be executed: - - - by using field values directly in order to aggregate data per-bucket (`map`) - - by using global ordinals of the field and allocating one bucket per global ordinal (`global_ordinals`) - -Elasticsearch tries to have sensible defaults so this is something that generally doesn't need to be configured. - -`global_ordinals` is the default option for `keyword` field, it uses global ordinals to allocates buckets dynamically -so memory usage is linear to the number of values of the documents that are part of the aggregation scope. - -`map` should only be considered when very few documents match a query. Otherwise the ordinals-based execution mode -is significantly faster. By default, `map` is only used when running an aggregation on scripts, since they don't have -ordinals. - -[source,console,id=terms-aggregation-execution-hint-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "tags": { - "terms": { - "field": "tags", - "execution_hint": "map" <1> - } - } - } -} --------------------------------------------------- - -<1> The possible values are `map`, `global_ordinals` - -Please note that Elasticsearch will ignore this execution hint if it is not applicable and that there is no backward compatibility guarantee on these hints. - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they -had a value. - -[source,console,id=terms-aggregation-missing-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "tags": { - "terms": { - "field": "tags", - "missing": "N/A" <1> - } - } - } -} --------------------------------------------------- - -<1> Documents without a value in the `tags` field will fall into the same bucket as documents that have the value `N/A`. - -==== Mixing field types - -WARNING: When aggregating on multiple indices the type of the aggregated field may not be the same in all indices. -Some types are compatible with each other (`integer` and `long` or `float` and `double`) but when the types are a mix -of decimal and non-decimal number the terms aggregation will promote the non-decimal numbers to decimal numbers. -This can result in a loss of precision in the bucket values. - -[discrete] -[[search-aggregations-bucket-terms-aggregation-troubleshooting]] -==== Troubleshooting - -===== Failed Trying to Format Bytes -When running a terms aggregation (or other aggregation, but in practice usually -terms) over multiple indices, you may get an error that starts with "Failed -trying to format bytes...". This is usually caused by two of the indices not -having the same mapping type for the field being aggregated. - -**Use an explicit `value_type`** -Although it's best to correct the mappings, you can work around this issue if -the field is unmapped in one of the indices. Setting the `value_type` parameter -can resolve the issue by coercing the unmapped field into the correct type. - -[source,console,id=terms-aggregation-value_type-example] ----- -GET /_search -{ - "aggs": { - "ip_addresses": { - "terms": { - "field": "destination_ip", - "missing": "0.0.0.0", - "value_type": "ip" - } - } - } -} ----- diff --git a/docs/reference/aggregations/bucket/time-series-aggregation.asciidoc b/docs/reference/aggregations/bucket/time-series-aggregation.asciidoc deleted file mode 100644 index 86e8355b69882..0000000000000 --- a/docs/reference/aggregations/bucket/time-series-aggregation.asciidoc +++ /dev/null @@ -1,115 +0,0 @@ -[[search-aggregations-bucket-time-series-aggregation]] -=== Time series aggregation -++++ -Time series -++++ - -preview::[] - -The time series aggregation queries data created using a <>. This is typically data such as metrics -or other data streams with a time component, and requires creating an index using the time series mode. - -[NOTE] -==== -Refer to the <> to learn more about the key differences from regular data streams. -==== - -////////////////////////// - -Creating a time series mapping - -To create an index with the time series mapping, specify "mode" as "time_series" in the index settings, -"routing_path" specifying the a list of time series fields, and a start and end time for the series. Each of the -"routing_path" fields must be keyword fields with "time_series_dimension" set to true. Additionally, add a -date field used as the timestamp. - -[source,js] --------------------------------------------------- -PUT /my-time-series-index -{ - "settings": { - "index": { - "number_of_shards": 3, - "number_of_replicas": 2, - "mode": "time_series", - "routing_path": ["key"], - "time_series": { - "start_time": "2022-01-01T00:00:00Z", - "end_time": "2023-01-01T00:00:00Z" - } - } - }, - "mappings": { - "properties": { - "key": { - "type": "keyword", - "time_series_dimension": true - }, - "@timestamp": { - "type": "date" - } - } - } -} -------------------------------------------------- -// NOTCONSOLE - -////////////////////////// - -Data can be added to the time series index like other indices: - -[source,js] --------------------------------------------------- -PUT /my-time-series-index-0/_bulk -{ "index": {} } -{ "key": "a", "val": 1, "@timestamp": "2022-01-01T00:00:10Z" } -{ "index": {}} -{ "key": "a", "val": 2, "@timestamp": "2022-01-02T00:00:00Z" } -{ "index": {} } -{ "key": "b", "val": 2, "@timestamp": "2022-01-01T00:00:10Z" } -{ "index": {}} -{ "key": "b", "val": 3, "@timestamp": "2022-01-02T00:00:00Z" } --------------------------------------------------- -// NOTCONSOLE - -To perform a time series aggregation, specify "time_series" as the aggregation type. When the boolean "keyed" -is true, each bucket is given a unique key. - -[source,js,id=time-series-aggregation-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "ts": { - "time_series": { "keyed": false } - } - } -} --------------------------------------------------- -// NOTCONSOLE - -This will return all results in the time series, however a more typical query will use sub aggregations to reduce the -date returned to something more relevant. - -[[search-aggregations-bucket-time-series-aggregation-size]] -==== Size - -By default, `time series` aggregations return 10000 results. The "size" parameter can be used to limit the results -further. Alternatively, using sub aggregations can limit the amount of values returned as a time series aggregation. - -[[search-aggregations-bucket-time-series-aggregation-keyed]] -==== Keyed - -The `keyed` parameter determines if buckets are returned as a map with unique keys per bucket. By default with `keyed` -set to false, buckets are returned as an array. - -[[times-series-aggregations-limitations]] -==== Limitations - -The `time_series` aggregation has many limitations. Many aggregation performance optimizations are disabled when using -the `time_series` aggregation. For example the filter by filter optimization or collect mode breath first (`terms` and -`multi_terms` aggregation forcefully use the depth first collect mode). - -The following aggregations also fail to work if used in combination with the `time_series` aggregation: -`auto_date_histogram`, `variable_width_histogram`, `rare_terms`, `global`, `composite`, `sampler`, `random_sampler` and -`diversified_sampler`. diff --git a/docs/reference/aggregations/bucket/variablewidthhistogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/variablewidthhistogram-aggregation.asciidoc deleted file mode 100644 index d6d7bbf432546..0000000000000 --- a/docs/reference/aggregations/bucket/variablewidthhistogram-aggregation.asciidoc +++ /dev/null @@ -1,96 +0,0 @@ -[[search-aggregations-bucket-variablewidthhistogram-aggregation]] -=== Variable width histogram aggregation -++++ -Variable width histogram -++++ - -This is a multi-bucket aggregation similar to <>. -However, the width of each bucket is not specified. Rather, a target number of buckets is provided and bucket intervals -are dynamically determined based on the document distribution. This is done using a simple one-pass document clustering algorithm -that aims to obtain low distances between bucket centroids. Unlike other multi-bucket aggregations, the intervals will not -necessarily have a uniform width. - -TIP: The number of buckets returned will always be less than or equal to the target number. - -Requesting a target of 2 buckets. - -[source,console] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "prices": { - "variable_width_histogram": { - "field": "price", - "buckets": 2 - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "prices": { - "buckets": [ - { - "min": 10.0, - "key": 30.0, - "max": 50.0, - "doc_count": 2 - }, - { - "min": 150.0, - "key": 185.0, - "max": 200.0, - "doc_count": 5 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -IMPORTANT: This aggregation cannot currently be nested under any aggregation that collects from more than a single bucket. - -==== Clustering Algorithm -Each shard fetches the first `initial_buffer` documents and stores them in memory. Once the buffer is full, these documents -are sorted and linearly separated into `3/4 * shard_size buckets`. -Next each remaining documents is either collected into the nearest bucket, or placed into a new bucket if it is distant -from all the existing ones. At most `shard_size` total buckets are created. - -In the reduce step, the coordinating node sorts the buckets from all shards by their centroids. Then, the two buckets -with the nearest centroids are repeatedly merged until the target number of buckets is achieved. -This merging procedure is a form of {wikipedia}/Hierarchical_clustering[agglomerative hierarchical clustering]. - -TIP: A shard can return fewer than `shard_size` buckets, but it cannot return more. - -==== Shard size -The `shard_size` parameter specifies the number of buckets that the coordinating node will request from each shard. -A higher `shard_size` leads each shard to produce smaller buckets. This reduces the likelihood of buckets overlapping -after the reduction step. Increasing the `shard_size` will improve the accuracy of the histogram, but it will -also make it more expensive to compute the final result because bigger priority queues will have to be managed on a -shard level, and the data transfers between the nodes and the client will be larger. - -TIP: Parameters `buckets`, `shard_size`, and `initial_buffer` are optional. By default, `buckets = 10`, `shard_size = buckets * 50`, and `initial_buffer = min(10 * shard_size, 50000)`. - -==== Initial Buffer -The `initial_buffer` parameter can be used to specify the number of individual documents that will be stored in memory -on a shard before the initial bucketing algorithm is run. Bucket distribution is determined using this sample -of `initial_buffer` documents. So, although a higher `initial_buffer` will use more memory, it will lead to more representative -clusters. - -==== Bucket bounds are approximate -During the reduce step, the master node continuously merges the two buckets with the nearest centroids. If two buckets have -overlapping bounds but distant centroids, then it is possible that they will not be merged. Because of this, after -reduction the maximum value in some interval (`max`) might be greater than the minimum value in the subsequent -bucket (`min`). To reduce the impact of this error, when such an overlap occurs the bound between these intervals is adjusted to be `(max + min) / 2`. - -TIP: Bucket bounds are very sensitive to outliers diff --git a/docs/reference/aggregations/metrics.asciidoc b/docs/reference/aggregations/metrics.asciidoc deleted file mode 100644 index 5143afddf8164..0000000000000 --- a/docs/reference/aggregations/metrics.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -[[search-aggregations-metrics]] -== Metrics aggregations - -The aggregations in this family compute metrics based on values extracted in one way or another from the documents that -are being aggregated. The values are typically extracted from the fields of the document (using the field data), but -can also be generated using scripts. - -Numeric metrics aggregations are a special type of metrics aggregation which output numeric values. Some aggregations output -a single numeric metric (e.g. `avg`) and are called `single-value numeric metrics aggregation`, others generate multiple -metrics (e.g. `stats`) and are called `multi-value numeric metrics aggregation`. The distinction between single-value and -multi-value numeric metrics aggregations plays a role when these aggregations serve as direct sub-aggregations of some -bucket aggregations (some bucket aggregations enable you to sort the returned buckets based on the numeric metrics in each bucket). - -include::metrics/avg-aggregation.asciidoc[] - -include::metrics/boxplot-aggregation.asciidoc[] - -include::metrics/cardinality-aggregation.asciidoc[] - -include::metrics/extendedstats-aggregation.asciidoc[] - -include::metrics/geobounds-aggregation.asciidoc[] - -include::metrics/geocentroid-aggregation.asciidoc[] - -include::metrics/geoline-aggregation.asciidoc[] - -include::metrics/cartesian-bounds-aggregation.asciidoc[] - -include::metrics/cartesian-centroid-aggregation.asciidoc[] - -include::metrics/matrix-stats-aggregation.asciidoc[] - -include::metrics/max-aggregation.asciidoc[] - -include::metrics/median-absolute-deviation-aggregation.asciidoc[] - -include::metrics/min-aggregation.asciidoc[] - -include::metrics/percentile-rank-aggregation.asciidoc[] - -include::metrics/percentile-aggregation.asciidoc[] - -include::metrics/rate-aggregation.asciidoc[] - -include::metrics/scripted-metric-aggregation.asciidoc[] - -include::metrics/stats-aggregation.asciidoc[] - -include::metrics/string-stats-aggregation.asciidoc[] - -include::metrics/sum-aggregation.asciidoc[] - -include::metrics/t-test-aggregation.asciidoc[] - -include::metrics/tophits-aggregation.asciidoc[] - -include::metrics/top-metrics-aggregation.asciidoc[] - -include::metrics/valuecount-aggregation.asciidoc[] - -include::metrics/weighted-avg-aggregation.asciidoc[] diff --git a/docs/reference/aggregations/metrics/avg-aggregation.asciidoc b/docs/reference/aggregations/metrics/avg-aggregation.asciidoc deleted file mode 100644 index 4883eec3e617a..0000000000000 --- a/docs/reference/aggregations/metrics/avg-aggregation.asciidoc +++ /dev/null @@ -1,161 +0,0 @@ -[[search-aggregations-metrics-avg-aggregation]] -=== Avg aggregation -++++ -Avg -++++ - -A `single-value` metrics aggregation that computes the average of numeric values that are extracted from the aggregated documents. These values can be extracted either from specific numeric or <> fields in the documents. - -Assuming the data consists of documents representing exams grades (between 0 -and 100) of students we can average their scores with: - -[source,console] --------------------------------------------------- -POST /exams/_search?size=0 -{ - "aggs": { - "avg_grade": { "avg": { "field": "grade" } } - } -} --------------------------------------------------- -// TEST[setup:exams] - -The above aggregation computes the average grade over all documents. The aggregation type is `avg` and the `field` setting defines the numeric field of the documents the average will be computed on. The above will return the following: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "avg_grade": { - "value": 75.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -The name of the aggregation (`avg_grade` above) also serves as the key by which the aggregation result can be retrieved from the returned response. - -==== Script - -Let's say the exam was exceedingly difficult, and you need to apply a grade correction. Average a <> to get a corrected average: - -[source,console] ----- -POST /exams/_search?size=0 -{ - "runtime_mappings": { - "grade.corrected": { - "type": "double", - "script": { - "source": "emit(Math.min(100, doc['grade'].value * params.correction))", - "params": { - "correction": 1.2 - } - } - } - }, - "aggs": { - "avg_corrected_grade": { - "avg": { - "field": "grade.corrected" - } - } - } -} ----- -// TEST[setup:exams] -// TEST[s/size=0/size=0&filter_path=aggregations/] - -//// -[source,console-result] ----- -{ - "aggregations": { - "avg_corrected_grade": { - "value": 80.0 - } - } -} ----- -//// - - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they -had a value. - -[source,console] --------------------------------------------------- -POST /exams/_search?size=0 -{ - "aggs": { - "grade_avg": { - "avg": { - "field": "grade", - "missing": 10 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:exams] - -<1> Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`. - - -[[search-aggregations-metrics-avg-aggregation-histogram-fields]] -==== Histogram fields -When average is computed on <>, the result of the aggregation is the weighted average -of all elements in the `values` array taking into consideration the number in the same position in the `counts` array. - -For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks: - -[source,console] --------------------------------------------------- -PUT metrics_index/_doc/1 -{ - "network.name" : "net-1", - "latency_histo" : { - "values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1> - "counts" : [3, 7, 23, 12, 6] <2> - } -} - -PUT metrics_index/_doc/2 -{ - "network.name" : "net-2", - "latency_histo" : { - "values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1> - "counts" : [8, 17, 8, 7, 6] <2> - } -} - -POST /metrics_index/_search?size=0 -{ - "aggs": { - "avg_latency": - { "avg": { "field": "latency_histo" } - } - } -} --------------------------------------------------- - -For each histogram field the `avg` aggregation adds each number in the `values` array <1> multiplied by its associated count -in the `counts` array <2>. Eventually, it will compute the average over those values for all histograms and return the following result: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "avg_latency": { - "value": 0.29690721649 - } - } -} --------------------------------------------------- -// TESTRESPONSE[skip:test not setup] diff --git a/docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc b/docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc deleted file mode 100644 index 82814f21bf100..0000000000000 --- a/docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc +++ /dev/null @@ -1,221 +0,0 @@ -[role="xpack"] -[[search-aggregations-metrics-boxplot-aggregation]] -=== Boxplot aggregation -++++ -Boxplot -++++ - -A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents. -These values can be generated from specific numeric or <> in the documents. - -The `boxplot` aggregation returns essential information for making a {wikipedia}/Box_plot[box plot]: minimum, maximum, -median, first quartile (25th percentile) and third quartile (75th percentile) values. - -==== Syntax - -A `boxplot` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "boxplot": { - "field": "load_time" - } -} --------------------------------------------------- -// NOTCONSOLE - -Let's look at a boxplot representing load time: - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_boxplot": { - "boxplot": { - "field": "load_time" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] -<1> The field `load_time` must be a numeric field - -The response will look like this: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "load_time_boxplot": { - "min": 0.0, - "max": 990.0, - "q1": 167.5, - "q2": 445.0, - "q3": 722.5, - "lower": 0.0, - "upper": 990.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -In this case, the lower and upper whisker values are equal to the min and max. In general, these values are the 1.5 * -IQR range, which is to say the nearest values to `q1 - (1.5 * IQR)` and `q3 + (1.5 * IQR)`. Since this is an approximation, the given values -may not actually be observed values from the data, but should be within a reasonable error bound of them. While the Boxplot aggregation -doesn't directly return outlier points, you can check if `lower > min` or `upper < max` to see if outliers exist on either side, and then -query for them directly. - -==== Script - -If you need to create a boxplot for values that aren't indexed exactly you -should create a <> and get the boxplot of that. For -example, if your load times are in milliseconds but you want values calculated -in seconds, use a runtime field to convert them: - -[source,console] ----- -GET latency/_search -{ - "size": 0, - "runtime_mappings": { - "load_time.seconds": { - "type": "long", - "script": { - "source": "emit(doc['load_time'].value / params.timeUnit)", - "params": { - "timeUnit": 1000 - } - } - } - }, - "aggs": { - "load_time_boxplot": { - "boxplot": { "field": "load_time.seconds" } - } - } -} ----- -// TEST[setup:latency] -// TEST[s/_search/_search?filter_path=aggregations/] -// TEST[s/"timeUnit": 1000/"timeUnit": 10/] - -//// -[source,console-result] --------------------------------------------------- -{ - "aggregations": { - "load_time_boxplot": { - "min": 0.0, - "max": 99.0, - "q1": 16.75, - "q2": 44.5, - "q3": 72.25, - "lower": 0.0, - "upper": 99.0 - } - } -} --------------------------------------------------- -//// - -[[search-aggregations-metrics-boxplot-aggregation-approximation]] -==== Boxplot values are (usually) approximate - -The algorithm used by the `boxplot` metric is called TDigest (introduced by -Ted Dunning in -https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf[Computing Accurate Quantiles using T-Digests]). - -[WARNING] -==== -Boxplot as other percentile aggregations are also -{wikipedia}/Nondeterministic_algorithm[non-deterministic]. -This means you can get slightly different results using the same data. -==== - -[[search-aggregations-metrics-boxplot-aggregation-compression]] -==== Compression - -Approximate algorithms must balance memory utilization with estimation accuracy. -This balance can be controlled using a `compression` parameter: - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_boxplot": { - "boxplot": { - "field": "load_time", - "compression": 200 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] - -<1> Compression controls memory usage and approximation error - -include::percentile-aggregation.asciidoc[tags=t-digest] - -==== Execution hint - -The default implementation of TDigest is optimized for performance, scaling to millions or even -billions of sample values while maintaining acceptable accuracy levels (close to 1% relative error -for millions of samples in some cases). There's an option to use an implementation optimized -for accuracy by setting parameter `execution_hint` to value `high_accuracy`: - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_boxplot": { - "boxplot": { - "field": "load_time", - "execution_hint": "high_accuracy" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] - -<1> Optimize TDigest for accuracy, at the expense of performance - -This option can lead to improved accuracy (relative error close to 0.01% for millions of samples in some -cases) but then percentile queries take 2x-10x longer to complete. - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they -had a value. - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "grade_boxplot": { - "boxplot": { - "field": "grade", - "missing": 10 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] - -<1> Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`. diff --git a/docs/reference/aggregations/metrics/cardinality-aggregation.asciidoc b/docs/reference/aggregations/metrics/cardinality-aggregation.asciidoc deleted file mode 100644 index e384fb6d42c4c..0000000000000 --- a/docs/reference/aggregations/metrics/cardinality-aggregation.asciidoc +++ /dev/null @@ -1,266 +0,0 @@ -[[search-aggregations-metrics-cardinality-aggregation]] -=== Cardinality aggregation -++++ -Cardinality -++++ - -A `single-value` metrics aggregation that calculates an approximate count of -distinct values. - -Assume you are indexing store sales and would like to count the unique number of sold products that match a query: - -[source,console] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "type_count": { - "cardinality": { - "field": "type" - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "type_count": { - "value": 3 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -==== Precision control - -This aggregation also supports the `precision_threshold` option: - -[source,console] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "type_count": { - "cardinality": { - "field": "type", - "precision_threshold": 100 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> The `precision_threshold` options allows to trade memory for accuracy, and -defines a unique count below which counts are expected to be close to -accurate. Above this value, counts might become a bit more fuzzy. The maximum -supported value is 40000, thresholds above this number will have the same -effect as a threshold of 40000. The default value is +3000+. - -==== Counts are approximate - -Computing exact counts requires loading values into a hash set and returning its -size. This doesn't scale when working on high-cardinality sets and/or large -values as the required memory usage and the need to communicate those -per-shard sets between nodes would utilize too many resources of the cluster. - -This `cardinality` aggregation is based on the -https://static.googleusercontent.com/media/research.google.com/fr//pubs/archive/40671.pdf[HyperLogLog++] -algorithm, which counts based on the hashes of the values with some interesting -properties: - -// tag::explanation[] - - * configurable precision, which decides on how to trade memory for accuracy, - * excellent accuracy on low-cardinality sets, - * fixed memory usage: no matter if there are tens or billions of unique values, - memory usage only depends on the configured precision. - -For a precision threshold of `c`, the implementation that we are using requires -about `c * 8` bytes. - -The following chart shows how the error varies before and after the threshold: - -//// -To generate this chart use this gnuplot script: -[source,gnuplot] -------- -#!/usr/bin/gnuplot -reset -set terminal png size 1000,400 - -set xlabel "Actual cardinality" -set logscale x - -set ylabel "Relative error (%)" -set yrange [0:8] - -set title "Cardinality error" -set grid - -set style data lines - -plot "test.dat" using 1:2 title "threshold=100", \ -"" using 1:3 title "threshold=1000", \ -"" using 1:4 title "threshold=10000" -# -------- - -and generate data in a 'test.dat' file using the below Java code: - -[source,java] -------- -private static double error(HyperLogLogPlusPlus h, long expected) { - double actual = h.cardinality(0); - return Math.abs(expected - actual) / expected; -} - -public static void main(String[] args) { - HyperLogLogPlusPlus h100 = new HyperLogLogPlusPlus(precisionFromThreshold(100), BigArrays.NON_RECYCLING_INSTANCE, 1); - HyperLogLogPlusPlus h1000 = new HyperLogLogPlusPlus(precisionFromThreshold(1000), BigArrays.NON_RECYCLING_INSTANCE, 1); - HyperLogLogPlusPlus h10000 = new HyperLogLogPlusPlus(precisionFromThreshold(10000), BigArrays.NON_RECYCLING_INSTANCE, 1); - - int next = 100; - int step = 10; - - for (int i = 1; i <= 10000000; ++i) { - long h = BitMixer.mix64(i); - h100.collect(0, h); - h1000.collect(0, h); - h10000.collect(0, h); - - if (i == next) { - System.out.println(i + " " + error(h100, i)*100 + " " + error(h1000, i)*100 + " " + error(h10000, i)*100); - next += step; - if (next >= 100 * step) { - step *= 10; - } - } - } -} -------- - -//// - -image:images/cardinality_error.png[] - -For all 3 thresholds, counts have been accurate up to the configured threshold. -Although not guaranteed, this is likely to be the case. Accuracy in practice depends -on the dataset in question. In general, most datasets show consistently good -accuracy. Also note that even with a threshold as low as 100, the error -remains very low (1-6% as seen in the above graph) even when counting millions of items. - -The HyperLogLog++ algorithm depends on the leading zeros of hashed -values, the exact distributions of hashes in a dataset can affect the -accuracy of the cardinality. - -// end::explanation[] - -==== Pre-computed hashes - -On string fields that have a high cardinality, it might be faster to store the -hash of your field values in your index and then run the cardinality aggregation -on this field. This can either be done by providing hash values from client-side -or by letting Elasticsearch compute hash values for you by using the -{plugins}/mapper-murmur3.html[`mapper-murmur3`] plugin. - -NOTE: Pre-computing hashes is usually only useful on very large and/or -high-cardinality fields as it saves CPU and memory. However, on numeric -fields, hashing is very fast and storing the original values requires as much -or less memory than storing the hashes. This is also true on low-cardinality -string fields, especially given that those have an optimization in order to -make sure that hashes are computed at most once per unique value per segment. - -==== Script - -If you need the cardinality of the combination of two fields, -create a <> combining them and aggregate it. - -[source,console] ----- -POST /sales/_search?size=0 -{ - "runtime_mappings": { - "type_and_promoted": { - "type": "keyword", - "script": "emit(doc['type'].value + ' ' + doc['promoted'].value)" - } - }, - "aggs": { - "type_promoted_count": { - "cardinality": { - "field": "type_and_promoted" - } - } - } -} ----- -// TEST[setup:sales] -// TEST[s/size=0/size=0&filter_path=aggregations/] - -//// -[source,console-result] --------------------------------------------------- -{ - "aggregations": { - "type_promoted_count": { - "value": 5 - } - } -} --------------------------------------------------- -//// - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they -had a value. - -[source,console] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "tag_cardinality": { - "cardinality": { - "field": "tag", - "missing": "N/A" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] -<1> Documents without a value in the `tag` field will fall into the same bucket as documents that have the value `N/A`. - -==== Execution hint - -You can run cardinality aggregations using different mechanisms: - - - by using field values directly (`direct`) - - by using global ordinals of the field and resolving those values after - finishing a shard (`global_ordinals`) - - by using segment ordinal values and resolving those values after each - segment (`segment_ordinals`) - -Additionally, there are two "heuristic based" modes. These modes will cause -{es} to use some data about the state of the index to choose an -appropriate execution method. The two heuristics are: - - - `save_time_heuristic` - this is the default in {es} 8.4 and later. - - `save_memory_heuristic` - this was the default in {es} 8.3 and - earlier - -When not specified, {es} will apply a heuristic to choose the -appropriate mode. Also note that for some data (non-ordinal fields), `direct` -is the only option, and the hint will be ignored in these cases. Generally -speaking, it should not be necessary to set this value. diff --git a/docs/reference/aggregations/metrics/cartesian-bounds-aggregation.asciidoc b/docs/reference/aggregations/metrics/cartesian-bounds-aggregation.asciidoc deleted file mode 100644 index 0a560c9be562d..0000000000000 --- a/docs/reference/aggregations/metrics/cartesian-bounds-aggregation.asciidoc +++ /dev/null @@ -1,148 +0,0 @@ -[[search-aggregations-metrics-cartesian-bounds-aggregation]] -=== Cartesian-bounds aggregation -++++ -Cartesian-bounds -++++ - -A metric aggregation that computes the spatial bounding box containing all values for a <> or <> field. - -Example: - -[source,console] --------------------------------------------------- -PUT /museums -{ - "mappings": { - "properties": { - "location": { - "type": "point" - } - } - } -} - -POST /museums/_bulk?refresh -{"index":{"_id":1}} -{"location": "POINT (491.2350 5237.4081)", "city": "Amsterdam", "name": "NEMO Science Museum"} -{"index":{"_id":2}} -{"location": "POINT (490.1618 5236.9219)", "city": "Amsterdam", "name": "Museum Het Rembrandthuis"} -{"index":{"_id":3}} -{"location": "POINT (491.4722 5237.1667)", "city": "Amsterdam", "name": "Nederlands Scheepvaartmuseum"} -{"index":{"_id":4}} -{"location": "POINT (440.5200 5122.2900)", "city": "Antwerp", "name": "Letterenhuis"} -{"index":{"_id":5}} -{"location": "POINT (233.6389 4886.1111)", "city": "Paris", "name": "Musée du Louvre"} -{"index":{"_id":6}} -{"location": "POINT (232.7000 4886.0000)", "city": "Paris", "name": "Musée d'Orsay"} - -POST /museums/_search?size=0 -{ - "query": { - "match": { "name": "musée" } - }, - "aggs": { - "viewport": { - "cartesian_bounds": { - "field": "location" <1> - } - } - } -} --------------------------------------------------- - -<1> The `cartesian_bounds` aggregation specifies the field to use to obtain the bounds, which must be a <> or a <> type. - -[NOTE] -Unlike the case with the <> aggregation, -there is no option to set <>. -This is because the cartesian space is euclidean and does not wrap back on itself. -So the bounds will always have a minimum x value less than or equal to the maximum x value. - -The above aggregation demonstrates how one would compute the bounding box of the location field for all documents with a name matching "musée". - -The response for the above aggregation: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "viewport": { - "bounds": { - "top_left": { - "x": 232.6999969482422, - "y": 4886.111328125 - }, - "bottom_right": { - "x": 233.63890075683594, - "y": 4886.0 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -[discrete] -[role="xpack"] -[[cartesian-bounds-aggregation-shape]] -==== Cartesian Bounds Aggregation on `shape` fields - -The Cartesian Bounds Aggregation is also supported on `cartesian_shape` fields. - -Example: - -[source,console] --------------------------------------------------- -PUT /places -{ - "mappings": { - "properties": { - "geometry": { - "type": "shape" - } - } - } -} - -POST /places/_bulk?refresh -{"index":{"_id":1}} -{"name": "NEMO Science Museum", "geometry": "POINT(491.2350 5237.4081)" } -{"index":{"_id":2}} -{"name": "Sportpark De Weeren", "geometry": { "type": "Polygon", "coordinates": [ [ [ 496.5305328369141, 5239.347642069457 ], [ 496.6979026794433, 5239.1721758934835 ], [ 496.9425201416015, 5239.238958618537 ], [ 496.7944622039794, 5239.420969150824 ], [ 496.5305328369141, 5239.347642069457 ] ] ] } } - -POST /places/_search?size=0 -{ - "aggs": { - "viewport": { - "cartesian_bounds": { - "field": "geometry" - } - } - } -} --------------------------------------------------- -// TEST - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "viewport": { - "bounds": { - "top_left": { - "x": 491.2349853515625, - "y": 5239.4208984375 - }, - "bottom_right": { - "x": 496.9425048828125, - "y": 5237.408203125 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] diff --git a/docs/reference/aggregations/metrics/cartesian-centroid-aggregation.asciidoc b/docs/reference/aggregations/metrics/cartesian-centroid-aggregation.asciidoc deleted file mode 100644 index 662b673fcd9a7..0000000000000 --- a/docs/reference/aggregations/metrics/cartesian-centroid-aggregation.asciidoc +++ /dev/null @@ -1,229 +0,0 @@ -[[search-aggregations-metrics-cartesian-centroid-aggregation]] -=== Cartesian-centroid aggregation - -++++ -Cartesian-centroid -++++ - -A metric aggregation that computes the weighted {wikipedia}/Centroid[centroid] from all coordinate values for point and shape fields. - -Example: - -[source,console] --------------------------------------------------- -PUT /museums -{ - "mappings": { - "properties": { - "location": { - "type": "point" - } - } - } -} - -POST /museums/_bulk?refresh -{"index":{"_id":1}} -{"location": "POINT (491.2350 5237.4081)", "city": "Amsterdam", "name": "NEMO Science Museum"} -{"index":{"_id":2}} -{"location": "POINT (490.1618 5236.9219)", "city": "Amsterdam", "name": "Museum Het Rembrandthuis"} -{"index":{"_id":3}} -{"location": "POINT (491.4722 5237.1667)", "city": "Amsterdam", "name": "Nederlands Scheepvaartmuseum"} -{"index":{"_id":4}} -{"location": "POINT (440.5200 5122.2900)", "city": "Antwerp", "name": "Letterenhuis"} -{"index":{"_id":5}} -{"location": "POINT (233.6389 4886.1111)", "city": "Paris", "name": "Musée du Louvre"} -{"index":{"_id":6}} -{"location": "POINT (232.7000 4886.0000)", "city": "Paris", "name": "Musée d'Orsay"} - -POST /museums/_search?size=0 -{ - "aggs": { - "centroid": { - "cartesian_centroid": { - "field": "location" <1> - } - } - } -} --------------------------------------------------- - -<1> The `cartesian_centroid` aggregation specifies the field to use for computing the centroid, which must be a <> or a <> type. - -The above aggregation demonstrates how one would compute the centroid of the location field for all museums' documents. - -The response for the above aggregation: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "centroid": { - "location": { - "x": 396.6213124593099, - "y": 5100.982991536458 - }, - "count": 6 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -The `cartesian_centroid` aggregation is more interesting when combined as a sub-aggregation to other bucket aggregations. - -Example: - -[source,console] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggs": { - "cities": { - "terms": { "field": "city.keyword" }, - "aggs": { - "centroid": { - "cartesian_centroid": { "field": "location" } - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -The above example uses `cartesian_centroid` as a sub-aggregation to a -<> bucket aggregation for finding the central location for museums in each city. - -The response for the above aggregation: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "cities": { - "sum_other_doc_count": 0, - "doc_count_error_upper_bound": 0, - "buckets": [ - { - "key": "Amsterdam", - "doc_count": 3, - "centroid": { - "location": { - "x": 490.9563293457031, - "y": 5237.16552734375 - }, - "count": 3 - } - }, - { - "key": "Paris", - "doc_count": 2, - "centroid": { - "location": { - "x": 233.16944885253906, - "y": 4886.0556640625 - }, - "count": 2 - } - }, - { - "key": "Antwerp", - "doc_count": 1, - "centroid": { - "location": { - "x": 440.5199890136719, - "y": 5122.2900390625 - }, - "count": 1 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - - -[discrete] -[role="xpack"] -[[cartesian-centroid-aggregation-geo-shape]] -==== Cartesian Centroid Aggregation on `shape` fields - -The centroid metric for shapes is more nuanced than for points. -The centroid of a specific aggregation bucket containing shapes is the centroid of the highest-dimensionality shape type in the bucket. -For example, if a bucket contains shapes consisting of polygons and lines, then the lines do not contribute to the centroid metric. -Each type of shape's centroid is calculated differently. -Envelopes and circles ingested via the <> are treated as polygons. - -|=== -|Geometry Type | Centroid Calculation - -|[Multi]Point -|equally weighted average of all the coordinates - -|[Multi]LineString -|a weighted average of all the centroids of each segment, where the weight of each segment is its length in the same units as the coordinates - -|[Multi]Polygon -|a weighted average of all the centroids of all the triangles of a polygon where the triangles are formed by every two consecutive vertices and the starting-point. -holes have negative weights. weights represent the area of the triangle is calculated in the square of the units of the coordinates - -|GeometryCollection -|The centroid of all the underlying geometries with the highest dimension. If Polygons and Lines and/or Points, then lines and/or points are ignored. -If Lines and Points, then points are ignored -|=== - -Example: - -[source,console] --------------------------------------------------- -PUT /places -{ - "mappings": { - "properties": { - "geometry": { - "type": "shape" - } - } - } -} - -POST /places/_bulk?refresh -{"index":{"_id":1}} -{"name": "NEMO Science Museum", "geometry": "POINT(491.2350 5237.4081)" } -{"index":{"_id":2}} -{"name": "Sportpark De Weeren", "geometry": { "type": "Polygon", "coordinates": [ [ [ 496.5305328369141, 5239.347642069457 ], [ 496.6979026794433, 5239.1721758934835 ], [ 496.9425201416015, 5239.238958618537 ], [ 496.7944622039794, 5239.420969150824 ], [ 496.5305328369141, 5239.347642069457 ] ] ] } } - -POST /places/_search?size=0 -{ - "aggs": { - "centroid": { - "cartesian_centroid": { - "field": "geometry" - } - } - } -} --------------------------------------------------- -// TEST - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "centroid": { - "location": { - "x": 496.74041748046875, - "y": 5239.29638671875 - }, - "count": 2 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] diff --git a/docs/reference/aggregations/metrics/extendedstats-aggregation.asciidoc b/docs/reference/aggregations/metrics/extendedstats-aggregation.asciidoc deleted file mode 100644 index 4bd614b47b4d7..0000000000000 --- a/docs/reference/aggregations/metrics/extendedstats-aggregation.asciidoc +++ /dev/null @@ -1,188 +0,0 @@ -[[search-aggregations-metrics-extendedstats-aggregation]] -=== Extended stats aggregation -++++ -Extended stats -++++ - -A `multi-value` metrics aggregation that computes stats over numeric values extracted from the aggregated documents. - -The `extended_stats` aggregations is an extended version of the <> aggregation, where additional metrics are added such as `sum_of_squares`, `variance`, `std_deviation` and `std_deviation_bounds`. - -Assuming the data consists of documents representing exams grades (between 0 and 100) of students - -[source,console] --------------------------------------------------- -GET /exams/_search -{ - "size": 0, - "aggs": { - "grades_stats": { "extended_stats": { "field": "grade" } } - } -} --------------------------------------------------- -// TEST[setup:exams] - -The above aggregation computes the grades statistics over all documents. The aggregation type is `extended_stats` and the `field` setting defines the numeric field of the documents the stats will be computed on. The above will return the following: - -The `std_deviation` and `variance` are calculated as population metrics so they are always the same as `std_deviation_population` and `variance_population` respectively. - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "grades_stats": { - "count": 2, - "min": 50.0, - "max": 100.0, - "avg": 75.0, - "sum": 150.0, - "sum_of_squares": 12500.0, - "variance": 625.0, - "variance_population": 625.0, - "variance_sampling": 1250.0, - "std_deviation": 25.0, - "std_deviation_population": 25.0, - "std_deviation_sampling": 35.35533905932738, - "std_deviation_bounds": { - "upper": 125.0, - "lower": 25.0, - "upper_population": 125.0, - "lower_population": 25.0, - "upper_sampling": 145.71067811865476, - "lower_sampling": 4.289321881345245 - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -The name of the aggregation (`grades_stats` above) also serves as the key by which the aggregation result can be retrieved from the returned response. - -==== Standard Deviation Bounds -By default, the `extended_stats` metric will return an object called `std_deviation_bounds`, which provides an interval of plus/minus two standard -deviations from the mean. This can be a useful way to visualize variance of your data. If you want a different boundary, for example -three standard deviations, you can set `sigma` in the request: - -[source,console] --------------------------------------------------- -GET /exams/_search -{ - "size": 0, - "aggs": { - "grades_stats": { - "extended_stats": { - "field": "grade", - "sigma": 3 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:exams] -<1> `sigma` controls how many standard deviations +/- from the mean should be displayed - -`sigma` can be any non-negative double, meaning you can request non-integer values such as `1.5`. A value of `0` is valid, but will simply -return the average for both `upper` and `lower` bounds. - -The `upper` and `lower` bounds are calculated as population metrics so they are always the same as `upper_population` and -`lower_population` respectively. - -.Standard Deviation and Bounds require normality -[NOTE] -===== -The standard deviation and its bounds are displayed by default, but they are not always applicable to all data-sets. Your data must -be normally distributed for the metrics to make sense. The statistics behind standard deviations assumes normally distributed data, so -if your data is skewed heavily left or right, the value returned will be misleading. -===== - -==== Script - -If you need to aggregate on a value that isn't indexed, use a <>. -Say the we found out that the grades we've been working on were for an exam that was above -the level of the students and we want to "correct" it: - -[source,console] ----- -GET /exams/_search -{ - "size": 0, - "runtime_mappings": { - "grade.corrected": { - "type": "double", - "script": { - "source": "emit(Math.min(100, doc['grade'].value * params.correction))", - "params": { - "correction": 1.2 - } - } - } - }, - "aggs": { - "grades_stats": { - "extended_stats": { "field": "grade.corrected" } - } - } -} ----- -// TEST[setup:exams] -// TEST[s/_search/_search?filter_path=aggregations/] - -//// -[source,console-result] ----- -{ - "aggregations": { - "grades_stats": { - "count": 2, - "min": 60.0, - "max": 100.0, - "avg": 80.0, - "sum": 160.0, - "sum_of_squares": 13600.0, - "variance": 400.0, - "variance_population": 400.0, - "variance_sampling": 800.0, - "std_deviation": 20.0, - "std_deviation_population": 20.0, - "std_deviation_sampling": 28.284271247461902, - "std_deviation_bounds": { - "upper": 120.0, - "lower": 40.0, - "upper_population": 120.0, - "lower_population": 40.0, - "upper_sampling": 136.5685424949238, - "lower_sampling": 23.431457505076196 - } - } - } -} ----- -//// - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they -had a value. - -[source,console] --------------------------------------------------- -GET /exams/_search -{ - "size": 0, - "aggs": { - "grades_stats": { - "extended_stats": { - "field": "grade", - "missing": 0 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:exams] - -<1> Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `0`. diff --git a/docs/reference/aggregations/metrics/geobounds-aggregation.asciidoc b/docs/reference/aggregations/metrics/geobounds-aggregation.asciidoc deleted file mode 100644 index 35ac798f364b0..0000000000000 --- a/docs/reference/aggregations/metrics/geobounds-aggregation.asciidoc +++ /dev/null @@ -1,155 +0,0 @@ -[[search-aggregations-metrics-geobounds-aggregation]] -=== Geo-bounds aggregation -++++ -Geo-bounds -++++ - -A metric aggregation that computes the geographic bounding box containing all values for a <> or <> field. - -Example: - -[source,console] --------------------------------------------------- -PUT /museums -{ - "mappings": { - "properties": { - "location": { - "type": "geo_point" - } - } - } -} - -POST /museums/_bulk?refresh -{"index":{"_id":1}} -{"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} -{"index":{"_id":2}} -{"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} -{"index":{"_id":3}} -{"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} -{"index":{"_id":4}} -{"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} -{"index":{"_id":5}} -{"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} -{"index":{"_id":6}} -{"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} - -POST /museums/_search?size=0 -{ - "query": { - "match": { "name": "musée" } - }, - "aggs": { - "viewport": { - "geo_bounds": { - "field": "location", <1> - "wrap_longitude": true <2> - } - } - } -} --------------------------------------------------- - -<1> The `geo_bounds` aggregation specifies the field to use to obtain the bounds, which must be a <> or a <> type. -<2> [[geo-bounds-wrap-longitude]] `wrap_longitude` is an optional parameter which specifies whether the bounding box should be allowed to overlap the international date line. The default value is `true`. - -The above aggregation demonstrates how one would compute the bounding box of the location field for all documents with a name matching "musée". - -The response for the above aggregation: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "viewport": { - "bounds": { - "top_left": { - "lat": 48.86111099738628, - "lon": 2.3269999679178 - }, - "bottom_right": { - "lat": 48.85999997612089, - "lon": 2.3363889567553997 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -[discrete] -[role="xpack"] -[[geobounds-aggregation-geo-shape]] -==== Geo Bounds Aggregation on `geo_shape` fields - -The Geo Bounds Aggregation is also supported on `geo_shape` fields. - -If <> is set to `true` -(the default), the bounding box can overlap the international date line and -return a bounds where the `top_left` longitude is larger than the `top_right` -longitude. - -For example, the upper right longitude will typically be greater than the lower -left longitude of a geographic bounding box. However, when the area -crosses the 180° meridian, the value of the lower left longitude will be -greater than the value of the upper right longitude. See -http://docs.opengeospatial.org/is/12-063r5/12-063r5.html#30[Geographic bounding box] on the Open Geospatial Consortium website for more information. - -Example: - -[source,console] --------------------------------------------------- -PUT /places -{ - "mappings": { - "properties": { - "geometry": { - "type": "geo_shape" - } - } - } -} - -POST /places/_bulk?refresh -{"index":{"_id":1}} -{"name": "NEMO Science Museum", "geometry": "POINT(4.912350 52.374081)" } -{"index":{"_id":2}} -{"name": "Sportpark De Weeren", "geometry": { "type": "Polygon", "coordinates": [ [ [ 4.965305328369141, 52.39347642069457 ], [ 4.966979026794433, 52.391721758934835 ], [ 4.969425201416015, 52.39238958618537 ], [ 4.967944622039794, 52.39420969150824 ], [ 4.965305328369141, 52.39347642069457 ] ] ] } } - -POST /places/_search?size=0 -{ - "aggs": { - "viewport": { - "geo_bounds": { - "field": "geometry" - } - } - } -} --------------------------------------------------- -// TEST - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "viewport": { - "bounds": { - "top_left": { - "lat": 52.39420966710895, - "lon": 4.912349972873926 - }, - "bottom_right": { - "lat": 52.374080987647176, - "lon": 4.969425117596984 - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] diff --git a/docs/reference/aggregations/metrics/geocentroid-aggregation.asciidoc b/docs/reference/aggregations/metrics/geocentroid-aggregation.asciidoc deleted file mode 100644 index 55beac2a08dd5..0000000000000 --- a/docs/reference/aggregations/metrics/geocentroid-aggregation.asciidoc +++ /dev/null @@ -1,245 +0,0 @@ -[[search-aggregations-metrics-geocentroid-aggregation]] -=== Geo-centroid aggregation - -++++ -Geo-centroid -++++ - -A metric aggregation that computes the weighted {wikipedia}/Centroid[centroid] from all coordinate values for geo fields. - -Example: - -[source,console] --------------------------------------------------- -PUT /museums -{ - "mappings": { - "properties": { - "location": { - "type": "geo_point" - } - } - } -} - -POST /museums/_bulk?refresh -{"index":{"_id":1}} -{"location": "POINT (4.912350 52.374081)", "city": "Amsterdam", "name": "NEMO Science Museum"} -{"index":{"_id":2}} -{"location": "POINT (4.901618 52.369219)", "city": "Amsterdam", "name": "Museum Het Rembrandthuis"} -{"index":{"_id":3}} -{"location": "POINT (4.914722 52.371667)", "city": "Amsterdam", "name": "Nederlands Scheepvaartmuseum"} -{"index":{"_id":4}} -{"location": "POINT (4.405200 51.222900)", "city": "Antwerp", "name": "Letterenhuis"} -{"index":{"_id":5}} -{"location": "POINT (2.336389 48.861111)", "city": "Paris", "name": "Musée du Louvre"} -{"index":{"_id":6}} -{"location": "POINT (2.327000 48.860000)", "city": "Paris", "name": "Musée d'Orsay"} - -POST /museums/_search?size=0 -{ - "aggs": { - "centroid": { - "geo_centroid": { - "field": "location" <1> - } - } - } -} --------------------------------------------------- - -<1> The `geo_centroid` aggregation specifies the field to use for computing the centroid. (NOTE: field must be a <> type) - -The above aggregation demonstrates how one would compute the centroid of the location field for all museums' documents. - -The response for the above aggregation: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "centroid": { - "location": { - "lat": 51.00982965203002, - "lon": 3.9662131341174245 - }, - "count": 6 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - -The `geo_centroid` aggregation is more interesting when combined as a sub-aggregation to other bucket aggregations. - -Example: - -[source,console] --------------------------------------------------- -POST /museums/_search?size=0 -{ - "aggs": { - "cities": { - "terms": { "field": "city.keyword" }, - "aggs": { - "centroid": { - "geo_centroid": { "field": "location" } - } - } - } - } -} --------------------------------------------------- -// TEST[continued] - -The above example uses `geo_centroid` as a sub-aggregation to a -<> bucket aggregation -for finding the central location for museums in each city. - -The response for the above aggregation: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "cities": { - "sum_other_doc_count": 0, - "doc_count_error_upper_bound": 0, - "buckets": [ - { - "key": "Amsterdam", - "doc_count": 3, - "centroid": { - "location": { - "lat": 52.371655656024814, - "lon": 4.909563297405839 - }, - "count": 3 - } - }, - { - "key": "Paris", - "doc_count": 2, - "centroid": { - "location": { - "lat": 48.86055548675358, - "lon": 2.3316944623366 - }, - "count": 2 - } - }, - { - "key": "Antwerp", - "doc_count": 1, - "centroid": { - "location": { - "lat": 51.22289997059852, - "lon": 4.40519998781383 - }, - "count": 1 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - - -[discrete] -[role="xpack"] -[[geocentroid-aggregation-geo-shape]] -==== Geo Centroid Aggregation on `geo_shape` fields - -The centroid metric for geoshapes is more nuanced than for points. The centroid of a specific aggregation bucket -containing shapes is the centroid of the highest-dimensionality shape type in the bucket. For example, if a bucket contains -shapes comprising of polygons and lines, then the lines do not contribute to the centroid metric. Each type of shape's -centroid is calculated differently. Envelopes and circles ingested via the <> are treated -as polygons. - -|=== -|Geometry Type | Centroid Calculation - -|[Multi]Point -|equally weighted average of all the coordinates - -|[Multi]LineString -|a weighted average of all the centroids of each segment, where the weight of each segment is its length in degrees - -|[Multi]Polygon -|a weighted average of all the centroids of all the triangles of a polygon where the triangles are formed by every two consecutive vertices and the starting-point. - holes have negative weights. weights represent the area of the triangle in deg^2 calculated - -|GeometryCollection -|The centroid of all the underlying geometries with the highest dimension. If Polygons and Lines and/or Points, then lines and/or points are ignored. - If Lines and Points, then points are ignored -|=== - -Example: - -[source,console] --------------------------------------------------- -PUT /places -{ - "mappings": { - "properties": { - "geometry": { - "type": "geo_shape" - } - } - } -} - -POST /places/_bulk?refresh -{"index":{"_id":1}} -{"name": "NEMO Science Museum", "geometry": "POINT(4.912350 52.374081)" } -{"index":{"_id":2}} -{"name": "Sportpark De Weeren", "geometry": { "type": "Polygon", "coordinates": [ [ [ 4.965305328369141, 52.39347642069457 ], [ 4.966979026794433, 52.391721758934835 ], [ 4.969425201416015, 52.39238958618537 ], [ 4.967944622039794, 52.39420969150824 ], [ 4.965305328369141, 52.39347642069457 ] ] ] } } - -POST /places/_search?size=0 -{ - "aggs": { - "centroid": { - "geo_centroid": { - "field": "geometry" - } - } - } -} --------------------------------------------------- -// TEST - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "centroid": { - "location": { - "lat": 52.39296147599816, - "lon": 4.967404240742326 - }, - "count": 2 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/] - - -[WARNING] -.Using `geo_centroid` as a sub-aggregation of `geohash_grid` -==== -The <> -aggregation places documents, not individual geopoints, into buckets. If a -document's `geo_point` field contains <>, the document -could be assigned to multiple buckets, even if one or more of its geopoints are -outside the bucket boundaries. - -If a `geocentroid` sub-aggregation is also used, each centroid is calculated -using all geopoints in a bucket, including those outside the bucket boundaries. -This can result in centroids outside of bucket boundaries. -==== diff --git a/docs/reference/aggregations/metrics/geoline-aggregation.asciidoc b/docs/reference/aggregations/metrics/geoline-aggregation.asciidoc deleted file mode 100644 index aabe8d172e4a0..0000000000000 --- a/docs/reference/aggregations/metrics/geoline-aggregation.asciidoc +++ /dev/null @@ -1,485 +0,0 @@ -[role="xpack"] -[[search-aggregations-metrics-geo-line]] -=== Geo-line aggregation -++++ -Geo-line -++++ - -The `geo_line` aggregation aggregates all `geo_point` values within a bucket into a `LineString` ordered -by the chosen `sort` field. This `sort` can be a date field, for example. The bucket returned is a valid -https://tools.ietf.org/html/rfc7946#section-3.2[GeoJSON Feature] representing the line geometry. - -[source,console,id=search-aggregations-metrics-geo-line-simple] ----- -PUT test -{ - "mappings": { - "properties": { - "my_location": { "type": "geo_point" }, - "group": { "type": "keyword" }, - "@timestamp": { "type": "date" } - } - } -} - -POST /test/_bulk?refresh -{"index":{}} -{"my_location": {"lat":52.373184, "lon":4.889187}, "@timestamp": "2023-01-02T09:00:00Z"} -{"index":{}} -{"my_location": {"lat":52.370159, "lon":4.885057}, "@timestamp": "2023-01-02T10:00:00Z"} -{"index":{}} -{"my_location": {"lat":52.369219, "lon":4.901618}, "@timestamp": "2023-01-02T13:00:00Z"} -{"index":{}} -{"my_location": {"lat":52.374081, "lon":4.912350}, "@timestamp": "2023-01-02T16:00:00Z"} -{"index":{}} -{"my_location": {"lat":52.371667, "lon":4.914722}, "@timestamp": "2023-01-03T12:00:00Z"} - -POST /test/_search?filter_path=aggregations -{ - "aggs": { - "line": { - "geo_line": { - "point": {"field": "my_location"}, - "sort": {"field": "@timestamp"} - } - } - } -} ----- - -Which returns: - -[source,js] ----- -{ - "aggregations": { - "line": { - "type": "Feature", - "geometry": { - "type": "LineString", - "coordinates": [ - [ 4.889187, 52.373184 ], - [ 4.885057, 52.370159 ], - [ 4.901618, 52.369219 ], - [ 4.912350, 52.374081 ], - [ 4.914722, 52.371667 ] - ] - }, - "properties": { - "complete": true - } - } - } -} ----- -// TESTRESPONSE - -The resulting https://tools.ietf.org/html/rfc7946#section-3.2[GeoJSON Feature] contains both a `LineString` geometry -for the path generated by the aggregation, as well as a map of `properties`. -The property `complete` informs of whether all documents matched were used to generate the geometry. -The <> can be used to limit the number of documents included in the aggregation, -leading to results with `complete: false`. -Exactly which documents are dropped from results <>. - -This result could be displayed in a map user interface: - -image:images/spatial/geo_line.png[Kibana map with museum tour of Amsterdam] - -[[search-aggregations-metrics-geo-line-options]] -==== Options - -`point`:: -(Required) - -This option specifies the name of the `geo_point` field - -Example usage configuring `my_location` as the point field: - -[source,js] ----- -"point": { - "field": "my_location" -} ----- -// NOTCONSOLE - -`sort`:: -(Required outside <> aggregations) - -This option specifies the name of the numeric field to use as the sort key for ordering the points. -When the `geo_line` aggregation is nested inside a -<> -aggregation, this field defaults to `@timestamp`, and any other value will result in error. - -Example usage configuring `@timestamp` as the sort key: - -[source,js] ----- -"sort": { - "field": "@timestamp" -} ----- -// NOTCONSOLE - -`include_sort`:: -(Optional, boolean, default: `false`) This option includes, when true, an additional array of the sort values in the -feature properties. - -`sort_order`:: -(Optional, string, default: `"ASC"`) This option accepts one of two values: "ASC", "DESC". -The line is sorted in ascending order by the sort key when set to "ASC", and in descending -with "DESC". - -[[search-aggregations-metrics-geo-line-size]] -`size`:: -(Optional, integer, default: `10000`) The maximum length of the line represented in the aggregation. -Valid sizes are between one and 10000. -Within <> -the aggregation uses line simplification to constrain the size, otherwise it uses truncation. -Refer to <> -for a discussion on the subtleties involved. - -[[search-aggregations-metrics-geo-line-grouping]] -==== Grouping - -This simple example produces a single track for all the data selected by the query. However, it is far more -common to need to group the data into multiple tracks. For example, grouping flight transponder measurements by -flight call-sign before sorting each flight by timestamp and producing a separate track for each. - -In the following examples we will group the locations of points of interest in the cities of -Amsterdam, Antwerp and Paris. -The tracks will be ordered by the planned visit sequence for a walking tour of the museums and others attractions. - -In order to demonstrate the difference between a time-series grouping and a non-time-series grouping, we will -first create an index with <>, -and then give examples of grouping the same data without time-series and with time-series. - -[source,console,id=search-aggregations-metrics-geo-line-grouping-setup] ----- -PUT tour -{ - "mappings": { - "properties": { - "city": { - "type": "keyword", - "time_series_dimension": true - }, - "category": { "type": "keyword" }, - "route": { "type": "long" }, - "name": { "type": "keyword" }, - "location": { "type": "geo_point" }, - "@timestamp": { "type": "date" } - } - }, - "settings": { - "index": { - "mode": "time_series", - "routing_path": [ "city" ], - "time_series": { - "start_time": "2023-01-01T00:00:00Z", - "end_time": "2024-01-01T00:00:00Z" - } - } - } -} - -POST /tour/_bulk?refresh -{"index":{}} -{"@timestamp": "2023-01-02T09:00:00Z", "route": 0, "location": "POINT(4.889187 52.373184)", "city": "Amsterdam", "category": "Attraction", "name": "Royal Palace Amsterdam"} -{"index":{}} -{"@timestamp": "2023-01-02T10:00:00Z", "route": 1, "location": "POINT(4.885057 52.370159)", "city": "Amsterdam", "category": "Attraction", "name": "The Amsterdam Dungeon"} -{"index":{}} -{"@timestamp": "2023-01-02T13:00:00Z", "route": 2, "location": "POINT(4.901618 52.369219)", "city": "Amsterdam", "category": "Museum", "name": "Museum Het Rembrandthuis"} -{"index":{}} -{"@timestamp": "2023-01-02T16:00:00Z", "route": 3, "location": "POINT(4.912350 52.374081)", "city": "Amsterdam", "category": "Museum", "name": "NEMO Science Museum"} -{"index":{}} -{"@timestamp": "2023-01-03T12:00:00Z", "route": 4, "location": "POINT(4.914722 52.371667)", "city": "Amsterdam", "category": "Museum", "name": "Nederlands Scheepvaartmuseum"} -{"index":{}} -{"@timestamp": "2023-01-04T09:00:00Z", "route": 5, "location": "POINT(4.401384 51.220292)", "city": "Antwerp", "category": "Attraction", "name": "Cathedral of Our Lady"} -{"index":{}} -{"@timestamp": "2023-01-04T12:00:00Z", "route": 6, "location": "POINT(4.405819 51.221758)", "city": "Antwerp", "category": "Museum", "name": "Snijders&Rockoxhuis"} -{"index":{}} -{"@timestamp": "2023-01-04T15:00:00Z", "route": 7, "location": "POINT(4.405200 51.222900)", "city": "Antwerp", "category": "Museum", "name": "Letterenhuis"} -{"index":{}} -{"@timestamp": "2023-01-05T10:00:00Z", "route": 8, "location": "POINT(2.336389 48.861111)", "city": "Paris", "category": "Museum", "name": "Musée du Louvre"} -{"index":{}} -{"@timestamp": "2023-01-05T14:00:00Z", "route": 9, "location": "POINT(2.327000 48.860000)", "city": "Paris", "category": "Museum", "name": "Musée dOrsay"} ----- - -[[search-aggregations-metrics-geo-line-grouping-terms]] -==== Grouping with terms - -Using this data, for a non-time-series use case, the grouping can be done using a -<> based on city name. -This would work whether or not we had defined the `tour` index as a time series index. - -[source,console,id=search-aggregations-metrics-geo-line-terms] ----- -POST /tour/_search?filter_path=aggregations -{ - "aggregations": { - "path": { - "terms": {"field": "city"}, - "aggregations": { - "museum_tour": { - "geo_line": { - "point": {"field": "location"}, - "sort": {"field": "@timestamp"} - } - } - } - } - } -} ----- -// TEST[continued] - -Which returns: - -[source,js] ----- -{ - "aggregations": { - "path": { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "Amsterdam", - "doc_count": 5, - "museum_tour": { - "type": "Feature", - "geometry": { - "coordinates": [ [ 4.889187, 52.373184 ], [ 4.885057, 52.370159 ], [ 4.901618, 52.369219 ], [ 4.91235, 52.374081 ], [ 4.914722, 52.371667 ] ], - "type": "LineString" - }, - "properties": { - "complete": true - } - } - }, - { - "key": "Antwerp", - "doc_count": 3, - "museum_tour": { - "type": "Feature", - "geometry": { - "coordinates": [ [ 4.401384, 51.220292 ], [ 4.405819, 51.221758 ], [ 4.4052, 51.2229 ] ], - "type": "LineString" - }, - "properties": { - "complete": true - } - } - }, - { - "key": "Paris", - "doc_count": 2, - "museum_tour": { - "type": "Feature", - "geometry": { - "coordinates": [ [ 2.336389, 48.861111 ], [ 2.327, 48.86 ] ], - "type": "LineString" - }, - "properties": { - "complete": true - } - } - } - ] - } - } -} ----- -// TESTRESPONSE - -These results contain an array of buckets, where each bucket is a JSON object with the `key` showing the name -of the `city` field, and an inner aggregation result called `museum_tour` containing a -https://tools.ietf.org/html/rfc7946#section-3.2[GeoJSON Feature] describing the -actual route between the various attractions in that city. -Each result also includes a `properties` object with a `complete` value which will be `false` if the geometry -was truncated to the limits specified in the `size` parameter. -Note that when we use `time_series` in the next example, we will get the same results structured a little differently. - -[[search-aggregations-metrics-geo-line-grouping-time-series]] -==== Grouping with time-series - -preview::[] - -Using the same data as before, we can also perform the grouping with a -<>. -This will group by TSID, which is defined as the combinations of all fields with `time_series_dimension: true`, -in this case the same `city` field used in the previous -<>. -This example will only work if we defined the `tour` index as a time series index using `index.mode="time_series"`. - -[source,console,id=search-aggregations-metrics-geo-line-time-series] ----- -POST /tour/_search?filter_path=aggregations -{ - "aggregations": { - "path": { - "time_series": {}, - "aggregations": { - "museum_tour": { - "geo_line": { - "point": {"field": "location"} - } - } - } - } - } -} ----- -// TEST[continued] - -NOTE: The `geo_line` aggregation no longer requires the `sort` field when nested within a -<>. -This is because the sort field is set to `@timestamp`, which all time-series indexes are pre-sorted by. -If you do set this parameter, and set it to something other than `@timestamp` you will get an error. - -This query will result in: - -[source,js] ----- -{ - "aggregations": { - "path": { - "buckets": { - "{city=Paris}": { - "key": { - "city": "Paris" - }, - "doc_count": 2, - "museum_tour": { - "type": "Feature", - "geometry": { - "coordinates": [ [ 2.336389, 48.861111 ], [ 2.327, 48.86 ] ], - "type": "LineString" - }, - "properties": { - "complete": true - } - } - }, - "{city=Antwerp}": { - "key": { - "city": "Antwerp" - }, - "doc_count": 3, - "museum_tour": { - "type": "Feature", - "geometry": { - "coordinates": [ [ 4.401384, 51.220292 ], [ 4.405819, 51.221758 ], [ 4.4052, 51.2229 ] ], - "type": "LineString" - }, - "properties": { - "complete": true - } - } - }, - "{city=Amsterdam}": { - "key": { - "city": "Amsterdam" - }, - "doc_count": 5, - "museum_tour": { - "type": "Feature", - "geometry": { - "coordinates": [ [ 4.889187, 52.373184 ], [ 4.885057, 52.370159 ], [ 4.901618, 52.369219 ], [ 4.91235, 52.374081 ], [ 4.914722, 52.371667 ] ], - "type": "LineString" - }, - "properties": { - "complete": true - } - } - } - } - } - } -} ----- -// TESTRESPONSE - -These results are essentially the same as with the previous `terms` aggregation example, but structured differently. -Here we see the buckets returned as a map, where the key is an internal description of the TSID. -This TSID is unique for each unique combination of fields with `time_series_dimension: true`. -Each bucket contains a `key` field which is also a map of all dimension values for the TSID, in this case only the city -name is used for grouping. -In addition, there is an inner aggregation result called `museum_tour` containing a -https://tools.ietf.org/html/rfc7946#section-3.2[GeoJSON Feature] describing the -actual route between the various attractions in that city. -Each result also includes a `properties` object with a `complete` value which will be false if the geometry -was simplified to the limits specified in the `size` parameter. - -[[search-aggregations-metrics-geo-line-grouping-time-series-advantages]] -==== Why group with time-series? - -When reviewing these examples, you might think that there is little difference between using -<> or -<> -to group the geo-lines. However, there are some important differences in behaviour between the two cases. -Time series indexes are stored in a very specific order on disk. -They are pre-grouped by the time-series dimension fields, and pre-sorted by the `@timestamp` field. -This allows the `geo_line` aggregation to be considerably optimized: - -* The same memory allocated for the first bucket can be re-used over and over for all subsequent buckets. - This is substantially less memory than required for non-time-series cases where all buckets are collected - concurrently. -* No sorting needs to be done, since the data is pre-sorted by `@timestamp`. - The time-series data will naturally arrive at the aggregation collector in `DESC` order. - This means that if we specify `sort_order:ASC` (the default), we still collect in `DESC` order, - but perform an efficient in-memory reverse order before generating the final `LineString` geometry. -* The `size` parameter can be used for a streaming line-simplification algorithm. - Without time-series, we are forced to truncate data, by default after 10000 documents per bucket, in order to - prevent memory usage from being unbounded. - This can result in geo-lines being truncated, and therefor loosing important data. - With time-series we can run a streaming line-simplification algorithm, retaining control over memory usage, - while also maintaining the overall geometry shape. - In fact, for most use cases it would work to set this `size` parameter to a much lower bound, and save even more - memory. For example, if the `geo_line` is to be drawn on a display map with a specific resolution, it might look - just as good to simplify to as few as 100 or 200 points. This will save memory on the server, on the network and - in the client. - -Note: There are other significant advantages to working with time-series data and using `time_series` index mode. -These are discussed in the documentation on <>. - -[[search-aggregations-metrics-geo-line-simplification]] -==== Streaming line simplification - -Line simplification is a great way to reduce the size of the final results sent to the client, and displayed in a map -user interface. However, normally these algorithms use a lot of memory to perform the simplification, requiring the -entire geometry to be maintained in memory together with supporting data for the simplification itself. -The use of a streaming line simplification algorithm allows for minimal memory usage during the simplification -process by constraining memory to the bounds defined for the simplified geometry. This is only possible if no sorting -is required, which is the case when grouping is done by the -<>, -running on an index with the `time_series` index mode. - -Under these conditions the `geo_line` aggregation allocates memory to the `size` specified, and then fills that -memory with the incoming documents. -Once the memory is completely filled, documents from within the line are removed as new documents are added. -The choice of document to remove is made to minimize the visual impact on the geometry. -This process makes use of the -https://en.wikipedia.org/wiki/Visvalingam%E2%80%93Whyatt_algorithm[Visvalingam–Whyatt algorithm]. -Essentially this means points are removed if they have the minimum triangle area, with the triangle defined -by the point under consideration and the two points before and after it in the line. -In addition, we calculate the area using spherical coordinates so that no planar distortions affect the choice. - -In order to demonstrate how much better line simplification is to line truncation, consider this example of the north -shore of Kodiak Island. -The data for this is only 209 points, but if we want to set `size` to `100` we get dramatic truncation. - -image:images/spatial/kodiak_geo_line_truncated.png[North short of Kodiak Island truncated to 100 points] - -The grey line is the entire geometry of 209 points, while the blue line is the first 100 points, a very different -geometry than the original. - -Now consider the same geometry simplified to 100 points. - -image:images/spatial/kodiak_geo_line_simplified.png[North short of Kodiak Island simplified to 100 points] - -For comparison we have shown the original in grey, the truncated in blue and the new simplified geometry -in magenta. It is possible to see where the new simplified line deviates from the original, but the overall -geometry appears almost identical and is still clearly recognizable as the north shore of Kodiak Island. diff --git a/docs/reference/aggregations/metrics/matrix-stats-aggregation.asciidoc b/docs/reference/aggregations/metrics/matrix-stats-aggregation.asciidoc deleted file mode 100644 index 730d554ec4e57..0000000000000 --- a/docs/reference/aggregations/metrics/matrix-stats-aggregation.asciidoc +++ /dev/null @@ -1,142 +0,0 @@ -[[search-aggregations-matrix-stats-aggregation]] -=== Matrix stats aggregation -++++ -Matrix stats -++++ - -The `matrix_stats` aggregation is a numeric aggregation that computes the following statistics over a set of document fields: - -[horizontal] -`count`:: Number of per field samples included in the calculation. -`mean`:: The average value for each field. -`variance`:: Per field Measurement for how spread out the samples are from the mean. -`skewness`:: Per field measurement quantifying the asymmetric distribution around the mean. -`kurtosis`:: Per field measurement quantifying the shape of the distribution. -`covariance`:: A matrix that quantitatively describes how changes in one field are associated with another. -`correlation`:: The covariance matrix scaled to a range of -1 to 1, inclusive. Describes the relationship between field - distributions. - -IMPORTANT: Unlike other metric aggregations, the `matrix_stats` aggregation does -not support scripting. - -////////////////////////// - -[source,js] --------------------------------------------------- -PUT /statistics/_doc/0 -{"poverty": 24.0, "income": 50000.0} - -PUT /statistics/_doc/1 -{"poverty": 13.0, "income": 95687.0} - -PUT /statistics/_doc/2 -{"poverty": 69.0, "income": 7890.0} - -POST /_refresh --------------------------------------------------- -// NOTCONSOLE -// TESTSETUP - -////////////////////////// - -The following example demonstrates the use of matrix stats to describe the relationship between income and poverty. - -[source,console,id=stats-aggregation-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "statistics": { - "matrix_stats": { - "fields": [ "poverty", "income" ] - } - } - } -} --------------------------------------------------- -// TEST[s/_search/_search\?filter_path=aggregations/] - -The aggregation type is `matrix_stats` and the `fields` setting defines the set of fields (as an array) for computing -the statistics. The above request returns the following response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "statistics": { - "doc_count": 50, - "fields": [ { - "name": "income", - "count": 50, - "mean": 51985.1, - "variance": 7.383377037755103E7, - "skewness": 0.5595114003506483, - "kurtosis": 2.5692365287787124, - "covariance": { - "income": 7.383377037755103E7, - "poverty": -21093.65836734694 - }, - "correlation": { - "income": 1.0, - "poverty": -0.8352655256272504 - } - }, { - "name": "poverty", - "count": 50, - "mean": 12.732000000000001, - "variance": 8.637730612244896, - "skewness": 0.4516049811903419, - "kurtosis": 2.8615929677997767, - "covariance": { - "income": -21093.65836734694, - "poverty": 8.637730612244896 - }, - "correlation": { - "income": -0.8352655256272504, - "poverty": 1.0 - } - } ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] -// TESTRESPONSE[s/: (\-)?[0-9\.E]+/: $body.$_path/] - -The `doc_count` field indicates the number of documents involved in the computation of the statistics. - -==== Multi Value Fields - -The `matrix_stats` aggregation treats each document field as an independent sample. The `mode` parameter controls what -array value the aggregation will use for array or multi-valued fields. This parameter can take one of the following: - -[horizontal] -`avg`:: (default) Use the average of all values. -`min`:: Pick the lowest value. -`max`:: Pick the highest value. -`sum`:: Use the sum of all values. -`median`:: Use the median of all values. - -==== Missing Values - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they had a value. -This is done by adding a set of fieldname : value mappings to specify default values per field. - -[source,console,id=stats-aggregation-missing-example] --------------------------------------------------- -GET /_search -{ - "aggs": { - "matrixstats": { - "matrix_stats": { - "fields": [ "poverty", "income" ], - "missing": { "income": 50000 } <1> - } - } - } -} --------------------------------------------------- - -<1> Documents without a value in the `income` field will have the default value `50000`. diff --git a/docs/reference/aggregations/metrics/max-aggregation.asciidoc b/docs/reference/aggregations/metrics/max-aggregation.asciidoc deleted file mode 100644 index 13855a2285249..0000000000000 --- a/docs/reference/aggregations/metrics/max-aggregation.asciidoc +++ /dev/null @@ -1,173 +0,0 @@ -[[search-aggregations-metrics-max-aggregation]] -=== Max aggregation -++++ -Max -++++ - -A `single-value` metrics aggregation that keeps track and returns the maximum -value among the numeric values extracted from the aggregated documents. - -NOTE: The `min` and `max` aggregation operate on the `double` representation of -the data. As a consequence, the result may be approximate when running on longs -whose absolute value is greater than +2^53+. - -Computing the max price value across all documents - -[source,console] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "max_price": { "max": { "field": "price" } } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "max_price": { - "value": 200.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -As can be seen, the name of the aggregation (`max_price` above) also serves as -the key by which the aggregation result can be retrieved from the returned -response. - -==== Script - -If you need to get the `max` of something more complex than a single field, -run an aggregation on a <>. - -[source,console] ----- -POST /sales/_search -{ - "size": 0, - "runtime_mappings": { - "price.adjusted": { - "type": "double", - "script": """ - double price = doc['price'].value; - if (doc['promoted'].value) { - price *= 0.8; - } - emit(price); - """ - } - }, - "aggs": { - "max_price": { - "max": { "field": "price.adjusted" } - } - } -} ----- -// TEST[setup:sales] -// TEST[s/_search/_search?filter_path=aggregations/] - -//// -[source,console-result] --------------------------------------------------- -{ - "aggregations": { - "max_price": { - "value": 175.0 - } - } -} --------------------------------------------------- -//// - - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should -be treated. By default they will be ignored but it is also possible to treat -them as if they had a value. - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "aggs" : { - "grade_max" : { - "max" : { - "field" : "grade", - "missing": 10 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> Documents without a value in the `grade` field will fall into the same -bucket as documents that have the value `10`. - -[[search-aggregations-metrics-max-aggregation-histogram-fields]] -==== Histogram fields - -When `max` is computed on <>, the result of the aggregation is the maximum -of all elements in the `values` array. Note, that the `counts` array of the histogram is ignored. - -For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks: - -[source,console] ----- -PUT metrics_index -{ - "mappings": { - "properties": { - "latency_histo": { "type": "histogram" } - } - } -} - -PUT metrics_index/_doc/1?refresh -{ - "network.name" : "net-1", - "latency_histo" : { - "values" : [0.1, 0.2, 0.3, 0.4, 0.5], - "counts" : [3, 7, 23, 12, 6] - } -} - -PUT metrics_index/_doc/2?refresh -{ - "network.name" : "net-2", - "latency_histo" : { - "values" : [0.1, 0.2, 0.3, 0.4, 0.5], - "counts" : [8, 17, 8, 7, 6] - } -} - -POST /metrics_index/_search?size=0&filter_path=aggregations -{ - "aggs" : { - "max_latency" : { "max" : { "field" : "latency_histo" } } - } -} ----- - -The `max` aggregation will return the maximum value of all histogram fields: - -[source,console-result] ----- -{ - "aggregations": { - "max_latency": { - "value": 0.5 - } - } -} ----- diff --git a/docs/reference/aggregations/metrics/median-absolute-deviation-aggregation.asciidoc b/docs/reference/aggregations/metrics/median-absolute-deviation-aggregation.asciidoc deleted file mode 100644 index 6c72edfe0af78..0000000000000 --- a/docs/reference/aggregations/metrics/median-absolute-deviation-aggregation.asciidoc +++ /dev/null @@ -1,187 +0,0 @@ -[[search-aggregations-metrics-median-absolute-deviation-aggregation]] -=== Median absolute deviation aggregation -++++ -Median absolute deviation -++++ - -This `single-value` aggregation approximates the {wikipedia}/Median_absolute_deviation[median absolute deviation] -of its search results. - -Median absolute deviation is a measure of variability. It is a robust -statistic, meaning that it is useful for describing data that may have -outliers, or may not be normally distributed. For such data it can be more -descriptive than standard deviation. - -It is calculated as the median of each data point's deviation from the median -of the entire sample. That is, for a random variable X, the median absolute -deviation is median(|median(X) - X~i~|). - -==== Example - -Assume our data represents product reviews on a one to five star scale. -Such reviews are usually summarized as a mean, which is easily understandable -but doesn't describe the reviews' variability. Estimating the median absolute -deviation can provide insight into how much reviews vary from one another. - -In this example we have a product which has an average rating of -3 stars. Let's look at its ratings' median absolute deviation to determine -how much they vary - -[source,console] ---------------------------------------------------------- -GET reviews/_search -{ - "size": 0, - "aggs": { - "review_average": { - "avg": { - "field": "rating" - } - }, - "review_variability": { - "median_absolute_deviation": { - "field": "rating" <1> - } - } - } -} ---------------------------------------------------------- -// TEST[setup:reviews] -<1> `rating` must be a numeric field - -The resulting median absolute deviation of `2` tells us that there is a fair -amount of variability in the ratings. Reviewers must have diverse opinions about -this product. - -[source,console-result] ---------------------------------------------------------- -{ - ... - "aggregations": { - "review_average": { - "value": 3.0 - }, - "review_variability": { - "value": 2.0 - } - } -} ---------------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -==== Approximation - -The naive implementation of calculating median absolute deviation stores the -entire sample in memory, so this aggregation instead calculates an -approximation. It uses the https://github.com/tdunning/t-digest[TDigest data structure] -to approximate the sample median and the median of deviations from the sample -median. For more about the approximation characteristics of TDigests, see -<>. - -The tradeoff between resource usage and accuracy of a TDigest's quantile -approximation, and therefore the accuracy of this aggregation's approximation -of median absolute deviation, is controlled by the `compression` parameter. A -higher `compression` setting provides a more accurate approximation at the -cost of higher memory usage. For more about the characteristics of the TDigest -`compression` parameter see -<>. - -[source,console] ---------------------------------------------------------- -GET reviews/_search -{ - "size": 0, - "aggs": { - "review_variability": { - "median_absolute_deviation": { - "field": "rating", - "compression": 100 - } - } - } -} ---------------------------------------------------------- -// TEST[setup:reviews] - -The default `compression` value for this aggregation is `1000`. At this -compression level this aggregation is usually within 5% of the exact result, -but observed performance will depend on the sample data. - -==== Script - -In the example above, product reviews are on a scale of one to five. If you -want to modify them to a scale of one to ten, use a <>. - -[source,console] ----- -GET reviews/_search?filter_path=aggregations -{ - "size": 0, - "runtime_mappings": { - "rating.out_of_ten": { - "type": "long", - "script": { - "source": "emit(doc['rating'].value * params.scaleFactor)", - "params": { - "scaleFactor": 2 - } - } - } - }, - "aggs": { - "review_average": { - "avg": { - "field": "rating.out_of_ten" - } - }, - "review_variability": { - "median_absolute_deviation": { - "field": "rating.out_of_ten" - } - } - } -} ----- -// TEST[setup:reviews] - -Which will result in: - -[source,console-result] ---------------------------------------------------------- -{ - "aggregations": { - "review_average": { - "value": 6.0 - }, - "review_variability": { - "value": 4.0 - } - } -} ---------------------------------------------------------- - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be -treated. By default they will be ignored but it is also possible to treat them -as if they had a value. - -Let's be optimistic and assume some reviewers loved the product so much that -they forgot to give it a rating. We'll assign them five stars - -[source,console] ---------------------------------------------------------- -GET reviews/_search -{ - "size": 0, - "aggs": { - "review_variability": { - "median_absolute_deviation": { - "field": "rating", - "missing": 5 - } - } - } -} ---------------------------------------------------------- -// TEST[setup:reviews] diff --git a/docs/reference/aggregations/metrics/min-aggregation.asciidoc b/docs/reference/aggregations/metrics/min-aggregation.asciidoc deleted file mode 100644 index d7bf8b478f563..0000000000000 --- a/docs/reference/aggregations/metrics/min-aggregation.asciidoc +++ /dev/null @@ -1,173 +0,0 @@ -[[search-aggregations-metrics-min-aggregation]] -=== Min aggregation -++++ -Min -++++ - -A `single-value` metrics aggregation that keeps track and returns the minimum -value among numeric values extracted from the aggregated documents. - -NOTE: The `min` and `max` aggregation operate on the `double` representation of -the data. As a consequence, the result may be approximate when running on longs -whose absolute value is greater than +2^53+. - -Computing the min price value across all documents: - -[source,console] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "min_price": { "min": { "field": "price" } } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "min_price": { - "value": 10.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -As can be seen, the name of the aggregation (`min_price` above) also serves as -the key by which the aggregation result can be retrieved from the returned -response. - -==== Script - -If you need to get the `min` of something more complex than a single field, -run the aggregation on a <>. - -[source,console] ----- -POST /sales/_search -{ - "size": 0, - "runtime_mappings": { - "price.adjusted": { - "type": "double", - "script": """ - double price = doc['price'].value; - if (doc['promoted'].value) { - price *= 0.8; - } - emit(price); - """ - } - }, - "aggs": { - "min_price": { - "min": { "field": "price.adjusted" } - } - } -} ----- -// TEST[setup:sales] -// TEST[s/_search/_search?filter_path=aggregations/] - -//// -[source,console-result] --------------------------------------------------- -{ - "aggregations": { - "min_price": { - "value": 8.0 - } - } -} --------------------------------------------------- -//// - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should -be treated. By default they will be ignored but it is also possible to treat -them as if they had a value. - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "aggs": { - "grade_min": { - "min": { - "field": "grade", - "missing": 10 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> Documents without a value in the `grade` field will fall into the same -bucket as documents that have the value `10`. - -[[search-aggregations-metrics-min-aggregation-histogram-fields]] -==== Histogram fields - -When `min` is computed on <>, the result of the aggregation is the minimum -of all elements in the `values` array. Note, that the `counts` array of the histogram is ignored. - -For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks: - -[source,console] ----- -PUT metrics_index -{ - "mappings": { - "properties": { - "latency_histo": { "type": "histogram" } - } - } -} - -PUT metrics_index/_doc/1?refresh -{ - "network.name" : "net-1", - "latency_histo" : { - "values" : [0.1, 0.2, 0.3, 0.4, 0.5], - "counts" : [3, 7, 23, 12, 6] - } -} - -PUT metrics_index/_doc/2?refresh -{ - "network.name" : "net-2", - "latency_histo" : { - "values" : [0.1, 0.2, 0.3, 0.4, 0.5], - "counts" : [8, 17, 8, 7, 6] - } -} - -POST /metrics_index/_search?size=0&filter_path=aggregations -{ - "aggs" : { - "min_latency" : { "min" : { "field" : "latency_histo" } } - } -} ----- - -The `min` aggregation will return the minimum value of all histogram fields: - -[source,console-result] ----- -{ - "aggregations": { - "min_latency": { - "value": 0.1 - } - } -} ----- diff --git a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc deleted file mode 100644 index 23a690b62372d..0000000000000 --- a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc +++ /dev/null @@ -1,418 +0,0 @@ -[[search-aggregations-metrics-percentile-aggregation]] -=== Percentiles aggregation -++++ -Percentiles -++++ - -A `multi-value` metrics aggregation that calculates one or more percentiles -over numeric values extracted from the aggregated documents. These values can be -extracted from specific numeric or <> in the documents. - -Percentiles show the point at which a certain percentage of observed values -occur. For example, the 95th percentile is the value which is greater than 95% -of the observed values. - -Percentiles are often used to find outliers. In normal distributions, the -0.13th and 99.87th percentiles represents three standard deviations from the -mean. Any data which falls outside three standard deviations is often considered -an anomaly. - -When a range of percentiles are retrieved, they can be used to estimate the -data distribution and determine if the data is skewed, bimodal, etc. - -Assume your data consists of website load times. The average and median -load times are not overly useful to an administrator. The max may be interesting, -but it can be easily skewed by a single slow response. - -Let's look at a range of percentiles representing load time: - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_outlier": { - "percentiles": { - "field": "load_time" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] -<1> The field `load_time` must be a numeric field - -By default, the `percentile` metric will generate a range of -percentiles: `[ 1, 5, 25, 50, 75, 95, 99 ]`. The response will look like this: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "load_time_outlier": { - "values": { - "1.0": 10.0, - "5.0": 30.0, - "25.0": 170.0, - "50.0": 445.0, - "75.0": 720.0, - "95.0": 940.0, - "99.0": 980.0 - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -// TESTRESPONSE[s/"1.0": 10.0/"1.0": 9.9/] -// TESTRESPONSE[s/"5.0": 30.0/"5.0": 29.5/] -// TESTRESPONSE[s/"25.0": 170.0/"25.0": 167.5/] -// TESTRESPONSE[s/"50.0": 445.0/"50.0": 445.0/] -// TESTRESPONSE[s/"75.0": 720.0/"75.0": 722.5/] -// TESTRESPONSE[s/"95.0": 940.0/"95.0": 940.5/] -// TESTRESPONSE[s/"99.0": 980.0/"99.0": 980.1/] - -As you can see, the aggregation will return a calculated value for each percentile -in the default range. If we assume response times are in milliseconds, it is -immediately obvious that the webpage normally loads in 10-720ms, but occasionally -spikes to 940-980ms. - -Often, administrators are only interested in outliers -- the extreme percentiles. -We can specify just the percents we are interested in (requested percentiles -must be a value between 0-100 inclusive): - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_outlier": { - "percentiles": { - "field": "load_time", - "percents": [ 95, 99, 99.9 ] <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] -<1> Use the `percents` parameter to specify particular percentiles to calculate - -==== Keyed Response - -By default the `keyed` flag is set to `true` which associates a unique string key with each bucket and returns the ranges as a hash rather than an array. Setting the `keyed` flag to `false` will disable this behavior: - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_outlier": { - "percentiles": { - "field": "load_time", - "keyed": false - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "load_time_outlier": { - "values": [ - { - "key": 1.0, - "value": 10.0 - }, - { - "key": 5.0, - "value": 30.0 - }, - { - "key": 25.0, - "value": 170.0 - }, - { - "key": 50.0, - "value": 445.0 - }, - { - "key": 75.0, - "value": 720.0 - }, - { - "key": 95.0, - "value": 940.0 - }, - { - "key": 99.0, - "value": 980.0 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -// TESTRESPONSE[s/"value": 10.0/"value": 9.9/] -// TESTRESPONSE[s/"value": 30.0/"value": 29.5/] -// TESTRESPONSE[s/"value": 170.0/"value": 167.5/] -// TESTRESPONSE[s/"value": 445.0/"value": 445.0/] -// TESTRESPONSE[s/"value": 720.0/"value": 722.5/] -// TESTRESPONSE[s/"value": 940.0/"value": 940.5/] -// TESTRESPONSE[s/"value": 980.0/"value": 980.1/] - -==== Script - -If you need to run the aggregation against values that aren't indexed, use -a <>. For example, if our load times -are in milliseconds but you want percentiles calculated in seconds: - -[source,console] ----- -GET latency/_search -{ - "size": 0, - "runtime_mappings": { - "load_time.seconds": { - "type": "long", - "script": { - "source": "emit(doc['load_time'].value / params.timeUnit)", - "params": { - "timeUnit": 1000 - } - } - } - }, - "aggs": { - "load_time_outlier": { - "percentiles": { - "field": "load_time.seconds" - } - } - } -} ----- -// TEST[setup:latency] -// TEST[s/_search/_search?filter_path=aggregations/] -// TEST[s/"timeUnit": 1000/"timeUnit": 10/] - -//// -[source,console-result] ----- -{ - "aggregations": { - "load_time_outlier": { - "values": { - "1.0": 0.99, - "5.0": 2.95, - "25.0": 16.75, - "50.0": 44.5, - "75.0": 72.25, - "95.0": 94.05, - "99.0": 98.01 - } - } - } -} ----- -//// - -[[search-aggregations-metrics-percentile-aggregation-approximation]] -==== Percentiles are (usually) approximate - -// tag::approximate[] -There are many different algorithms to calculate percentiles. The naive -implementation simply stores all the values in a sorted array. To find the 50th -percentile, you simply find the value that is at `my_array[count(my_array) * 0.5]`. - -Clearly, the naive implementation does not scale -- the sorted array grows -linearly with the number of values in your dataset. To calculate percentiles -across potentially billions of values in an Elasticsearch cluster, _approximate_ -percentiles are calculated. - -The algorithm used by the `percentile` metric is called TDigest (introduced by -Ted Dunning in -https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf[Computing Accurate Quantiles using T-Digests]). - -When using this metric, there are a few guidelines to keep in mind: - -- Accuracy is proportional to `q(1-q)`. This means that extreme percentiles (e.g. 99%) -are more accurate than less extreme percentiles, such as the median -- For small sets of values, percentiles are highly accurate (and potentially -100% accurate if the data is small enough). -- As the quantity of values in a bucket grows, the algorithm begins to approximate -the percentiles. It is effectively trading accuracy for memory savings. The -exact level of inaccuracy is difficult to generalize, since it depends on your -data distribution and volume of data being aggregated - -The following chart shows the relative error on a uniform distribution depending -on the number of collected values and the requested percentile: - -image:images/percentiles_error.png[] - -It shows how precision is better for extreme percentiles. The reason why error diminishes -for large number of values is that the law of large numbers makes the distribution of -values more and more uniform and the t-digest tree can do a better job at summarizing -it. It would not be the case on more skewed distributions. - -// end::approximate[] - -[WARNING] -==== -Percentile aggregations are also -{wikipedia}/Nondeterministic_algorithm[non-deterministic]. -This means you can get slightly different results using the same data. -==== - -[[search-aggregations-metrics-percentile-aggregation-compression]] -==== Compression - -Approximate algorithms must balance memory utilization with estimation accuracy. -This balance can be controlled using a `compression` parameter: - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_outlier": { - "percentiles": { - "field": "load_time", - "tdigest": { - "compression": 200 <1> - } - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] - -<1> Compression controls memory usage and approximation error - -// tag::t-digest[] -The TDigest algorithm uses a number of "nodes" to approximate percentiles -- the -more nodes available, the higher the accuracy (and large memory footprint) proportional -to the volume of data. The `compression` parameter limits the maximum number of -nodes to `20 * compression`. - -Therefore, by increasing the compression value, you can increase the accuracy of -your percentiles at the cost of more memory. Larger compression values also -make the algorithm slower since the underlying tree data structure grows in size, -resulting in more expensive operations. The default compression value is -`100`. - -A "node" uses roughly 32 bytes of memory, so under worst-case scenarios (large amount -of data which arrives sorted and in-order) the default settings will produce a -TDigest roughly 64KB in size. In practice data tends to be more random and -the TDigest will use less memory. -// end::t-digest[] - -[[search-aggregations-metrics-percentile-aggregation-execution-hint]] -==== Execution hint - -The default implementation of TDigest is optimized for performance, scaling to millions or even -billions of sample values while maintaining acceptable accuracy levels (close to 1% relative error -for millions of samples in some cases). There's an option to use an implementation optimized -for accuracy by setting parameter `execution_hint` to value `high_accuracy`: - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_outlier": { - "percentiles": { - "field": "load_time", - "tdigest": { - "execution_hint": "high_accuracy" <1> - } - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] - -<1> Optimize TDigest for accuracy, at the expense of performance - -This option can lead to improved accuracy (relative error close to 0.01% for millions of samples in some -cases) but then percentile queries take 2x-10x longer to complete. - -==== HDR histogram - -https://github.com/HdrHistogram/HdrHistogram[HDR Histogram] (High Dynamic Range Histogram) is an alternative implementation -that can be useful when calculating percentiles for latency measurements as it can be faster than the t-digest implementation -with the trade-off of a larger memory footprint. This implementation maintains a fixed worse-case percentage error (specified -as a number of significant digits). This means that if data is recorded with values from 1 microsecond up to 1 hour -(3,600,000,000 microseconds) in a histogram set to 3 significant digits, it will maintain a value resolution of 1 microsecond -for values up to 1 millisecond and 3.6 seconds (or better) for the maximum tracked value (1 hour). - -The HDR Histogram can be used by specifying the `hdr` parameter in the request: - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_outlier": { - "percentiles": { - "field": "load_time", - "percents": [ 95, 99, 99.9 ], - "hdr": { <1> - "number_of_significant_value_digits": 3 <2> - } - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] - -<1> `hdr` object indicates that HDR Histogram should be used to calculate the percentiles and specific settings for this algorithm can be specified inside the object -<2> `number_of_significant_value_digits` specifies the resolution of values for the histogram in number of significant digits - -The HDRHistogram only supports positive values and will error if it is passed a negative value. It is also not a good idea to use -the HDRHistogram if the range of values is unknown as this could lead to high memory usage. - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they -had a value. - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "grade_percentiles": { - "percentiles": { - "field": "grade", - "missing": 10 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] - -<1> Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`. diff --git a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc deleted file mode 100644 index 1bc2744600e8b..0000000000000 --- a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc +++ /dev/null @@ -1,233 +0,0 @@ -[[search-aggregations-metrics-percentile-rank-aggregation]] -=== Percentile ranks aggregation -++++ -Percentile ranks -++++ - -A `multi-value` metrics aggregation that calculates one or more percentile ranks -over numeric values extracted from the aggregated documents. These values can be -extracted from specific numeric or <> in the documents. - -[NOTE] -================================================== -Please see <>, -<> and -<> for advice -regarding approximation, performance and memory use of the percentile ranks aggregation -================================================== - -Percentile rank show the percentage of observed values which are below certain -value. For example, if a value is greater than or equal to 95% of the observed values -it is said to be at the 95th percentile rank. - -Assume your data consists of website load times. You may have a service agreement that -95% of page loads complete within 500ms and 99% of page loads complete within 600ms. - -Let's look at a range of percentiles representing load time: - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_ranks": { - "percentile_ranks": { - "field": "load_time", <1> - "values": [ 500, 600 ] - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] - -<1> The field `load_time` must be a numeric field - -The response will look like this: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "load_time_ranks": { - "values": { - "500.0": 55.0, - "600.0": 64.0 - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -// TESTRESPONSE[s/"500.0": 55.0/"500.0": 55.00000000000001/] -// TESTRESPONSE[s/"600.0": 64.0/"600.0": 64.0/] - -From this information you can determine you are hitting the 99% load time target but not quite -hitting the 95% load time target - -==== Keyed Response - -By default the `keyed` flag is set to `true` associates a unique string key with each bucket and returns the ranges as a hash rather than an array. Setting the `keyed` flag to `false` will disable this behavior: - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_ranks": { - "percentile_ranks": { - "field": "load_time", - "values": [ 500, 600 ], - "keyed": false - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "load_time_ranks": { - "values": [ - { - "key": 500.0, - "value": 55.0 - }, - { - "key": 600.0, - "value": 64.0 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -// TESTRESPONSE[s/"value": 55.0/"value": 55.00000000000001/] -// TESTRESPONSE[s/"value": 64.0/"value": 64.0/] - - -==== Script - -If you need to run the aggregation against values that aren't indexed, use -a <>. For example, if our load times -are in milliseconds but we want percentiles calculated in seconds: - -[source,console] ----- -GET latency/_search -{ - "size": 0, - "runtime_mappings": { - "load_time.seconds": { - "type": "long", - "script": { - "source": "emit(doc['load_time'].value / params.timeUnit)", - "params": { - "timeUnit": 1000 - } - } - } - }, - "aggs": { - "load_time_ranks": { - "percentile_ranks": { - "values": [ 500, 600 ], - "field": "load_time.seconds" - } - } - } -} ----- -// TEST[setup:latency] -// TEST[s/_search/_search?filter_path=aggregations/] - -//// -[source,console-result] --------------------------------------------------- -{ - "aggregations": { - "load_time_ranks": { - "values": { - "500.0": 100.0, - "600.0": 100.0 - } - } - } -} --------------------------------------------------- -//// - -==== HDR Histogram - -https://github.com/HdrHistogram/HdrHistogram[HDR Histogram] (High Dynamic Range Histogram) is an alternative implementation -that can be useful when calculating percentile ranks for latency measurements as it can be faster than the t-digest implementation -with the trade-off of a larger memory footprint. This implementation maintains a fixed worse-case percentage error (specified as a -number of significant digits). This means that if data is recorded with values from 1 microsecond up to 1 hour (3,600,000,000 -microseconds) in a histogram set to 3 significant digits, it will maintain a value resolution of 1 microsecond for values up to -1 millisecond and 3.6 seconds (or better) for the maximum tracked value (1 hour). - -The HDR Histogram can be used by specifying the `hdr` object in the request: - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_ranks": { - "percentile_ranks": { - "field": "load_time", - "values": [ 500, 600 ], - "hdr": { <1> - "number_of_significant_value_digits": 3 <2> - } - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] - -<1> `hdr` object indicates that HDR Histogram should be used to calculate the percentiles and specific settings for this algorithm can be specified inside the object -<2> `number_of_significant_value_digits` specifies the resolution of values for the histogram in number of significant digits - -The HDRHistogram only supports positive values and will error if it is passed a negative value. It is also not a good idea to use -the HDRHistogram if the range of values is unknown as this could lead to high memory usage. - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they -had a value. - -[source,console] --------------------------------------------------- -GET latency/_search -{ - "size": 0, - "aggs": { - "load_time_ranks": { - "percentile_ranks": { - "field": "load_time", - "values": [ 500, 600 ], - "missing": 10 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:latency] - -<1> Documents without a value in the `load_time` field will fall into the same bucket as documents that have the value `10`. diff --git a/docs/reference/aggregations/metrics/rate-aggregation.asciidoc b/docs/reference/aggregations/metrics/rate-aggregation.asciidoc deleted file mode 100644 index ab3a3c092dc1f..0000000000000 --- a/docs/reference/aggregations/metrics/rate-aggregation.asciidoc +++ /dev/null @@ -1,481 +0,0 @@ -[role="xpack"] -[[search-aggregations-metrics-rate-aggregation]] -=== Rate aggregation -++++ -Rate -++++ - -A `rate` metrics aggregation can be used only inside a `date_histogram` or `composite` aggregation. It calculates a rate of documents -or a field in each bucket. The field values can be extracted from specific numeric or -<> in the documents. - -NOTE: For `composite` aggregations, there must be exactly one `date_histogram` source for the `rate` aggregation to be supported. - -==== Syntax - -A `rate` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "rate": { - "unit": "month", - "field": "requests" - } -} --------------------------------------------------- -// NOTCONSOLE - -The following request will group all sales records into monthly buckets and then convert the number of sales transactions in each bucket -into per annual sales rate. - -[source,console] --------------------------------------------------- -GET sales/_search -{ - "size": 0, - "aggs": { - "by_date": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" <1> - }, - "aggs": { - "my_rate": { - "rate": { - "unit": "year" <2> - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] -<1> Histogram is grouped by month. -<2> But the rate is converted into annual rate. - -The response will return the annual rate of transactions in each bucket. Since there are 12 months per year, the annual rate will -be automatically calculated by multiplying the monthly rate by 12. - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations" : { - "by_date" : { - "buckets" : [ - { - "key_as_string" : "2015/01/01 00:00:00", - "key" : 1420070400000, - "doc_count" : 3, - "my_rate" : { - "value" : 36.0 - } - }, - { - "key_as_string" : "2015/02/01 00:00:00", - "key" : 1422748800000, - "doc_count" : 2, - "my_rate" : { - "value" : 24.0 - } - }, - { - "key_as_string" : "2015/03/01 00:00:00", - "key" : 1425168000000, - "doc_count" : 2, - "my_rate" : { - "value" : 24.0 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -Instead of counting the number of documents, it is also possible to calculate a sum of all values of the fields in the documents in each -bucket or the number of values in each bucket. The following request will group all sales records into monthly bucket and than calculate -the total monthly sales and convert them into average daily sales. - -[source,console] --------------------------------------------------- -GET sales/_search -{ - "size": 0, - "aggs": { - "by_date": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" <1> - }, - "aggs": { - "avg_price": { - "rate": { - "field": "price", <2> - "unit": "day" <3> - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] -<1> Histogram is grouped by month. -<2> Calculate sum of all sale prices -<3> Convert to average daily sales - -The response will contain the average daily sale prices for each month. - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations" : { - "by_date" : { - "buckets" : [ - { - "key_as_string" : "2015/01/01 00:00:00", - "key" : 1420070400000, - "doc_count" : 3, - "avg_price" : { - "value" : 17.741935483870968 - } - }, - { - "key_as_string" : "2015/02/01 00:00:00", - "key" : 1422748800000, - "doc_count" : 2, - "avg_price" : { - "value" : 2.142857142857143 - } - }, - { - "key_as_string" : "2015/03/01 00:00:00", - "key" : 1425168000000, - "doc_count" : 2, - "avg_price" : { - "value" : 12.096774193548388 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -You can also take advantage of `composite` aggregations to calculate the average daily sale price for each item in -your inventory - -[source,console] --------------------------------------------------- -GET sales/_search?filter_path=aggregations&size=0 -{ - "aggs": { - "buckets": { - "composite": { <1> - "sources": [ - { - "month": { - "date_histogram": { <2> - "field": "date", - "calendar_interval": "month" - } - } - }, - { - "type": { <3> - "terms": { - "field": "type" - } - } - } - ] - }, - "aggs": { - "avg_price": { - "rate": { - "field": "price", <4> - "unit": "day" <5> - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] -<1> Composite aggregation with a date histogram source - and a source for the item type. -<2> The date histogram source grouping monthly -<3> The terms source grouping for each sale item type -<4> Calculate sum of all sale prices, per month and item -<5> Convert to average daily sales per item - -The response will contain the average daily sale prices for each month per item. - -[source,console-result] --------------------------------------------------- -{ - "aggregations" : { - "buckets" : { - "after_key" : { - "month" : 1425168000000, - "type" : "t-shirt" - }, - "buckets" : [ - { - "key" : { - "month" : 1420070400000, - "type" : "bag" - }, - "doc_count" : 1, - "avg_price" : { - "value" : 4.838709677419355 - } - }, - { - "key" : { - "month" : 1420070400000, - "type" : "hat" - }, - "doc_count" : 1, - "avg_price" : { - "value" : 6.451612903225806 - } - }, - { - "key" : { - "month" : 1420070400000, - "type" : "t-shirt" - }, - "doc_count" : 1, - "avg_price" : { - "value" : 6.451612903225806 - } - }, - { - "key" : { - "month" : 1422748800000, - "type" : "hat" - }, - "doc_count" : 1, - "avg_price" : { - "value" : 1.7857142857142858 - } - }, - { - "key" : { - "month" : 1422748800000, - "type" : "t-shirt" - }, - "doc_count" : 1, - "avg_price" : { - "value" : 0.35714285714285715 - } - }, - { - "key" : { - "month" : 1425168000000, - "type" : "hat" - }, - "doc_count" : 1, - "avg_price" : { - "value" : 6.451612903225806 - } - }, - { - "key" : { - "month" : 1425168000000, - "type" : "t-shirt" - }, - "doc_count" : 1, - "avg_price" : { - "value" : 5.645161290322581 - } - } - ] - } - } -} --------------------------------------------------- - -By adding the `mode` parameter with the value `value_count`, we can change the calculation from `sum` to the number of values of the field: - -[source,console] --------------------------------------------------- -GET sales/_search -{ - "size": 0, - "aggs": { - "by_date": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" <1> - }, - "aggs": { - "avg_number_of_sales_per_year": { - "rate": { - "field": "price", <2> - "unit": "year", <3> - "mode": "value_count" <4> - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] -<1> Histogram is grouped by month. -<2> Calculate number of all sale prices -<3> Convert to annual counts -<4> Changing the mode to value count - -The response will contain the average daily sale prices for each month. - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations" : { - "by_date" : { - "buckets" : [ - { - "key_as_string" : "2015/01/01 00:00:00", - "key" : 1420070400000, - "doc_count" : 3, - "avg_number_of_sales_per_year" : { - "value" : 36.0 - } - }, - { - "key_as_string" : "2015/02/01 00:00:00", - "key" : 1422748800000, - "doc_count" : 2, - "avg_number_of_sales_per_year" : { - "value" : 24.0 - } - }, - { - "key_as_string" : "2015/03/01 00:00:00", - "key" : 1425168000000, - "doc_count" : 2, - "avg_number_of_sales_per_year" : { - "value" : 24.0 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -By default `sum` mode is used. - -`"mode": "sum"`:: calculate the sum of all values field -`"mode": "value_count"`:: use the number of values in the field - -==== Relationship between bucket sizes and rate - -The `rate` aggregation supports all rate that can be used <> of `date_histogram` -aggregation. The specified rate should compatible with the `date_histogram` aggregation interval, i.e. it should be possible to -convert the bucket size into the rate. By default the interval of the `date_histogram` is used. - -`"rate": "second"`:: compatible with all intervals -`"rate": "minute"`:: compatible with all intervals -`"rate": "hour"`:: compatible with all intervals -`"rate": "day"`:: compatible with all intervals -`"rate": "week"`:: compatible with all intervals -`"rate": "month"`:: compatible with only with `month`, `quarter` and `year` calendar intervals -`"rate": "quarter"`:: compatible with only with `month`, `quarter` and `year` calendar intervals -`"rate": "year"`:: compatible with only with `month`, `quarter` and `year` calendar intervals - -There is also an additional limitations if the date histogram is not a direct parent of the rate histogram. In this case both rate interval -and histogram interval have to be in the same group: [`second`, ` minute`, `hour`, `day`, `week`] or [`month`, `quarter`, `year`]. For -example, if the date histogram is `month` based, only rate intervals of `month`, `quarter` or `year` are supported. If the date histogram -is `day` based, only `second`, ` minute`, `hour`, `day`, and `week` rate intervals are supported. - -==== Script - -If you need to run the aggregation against values that aren't indexed, run the -aggregation on a <>. For example, if we need to adjust -our prices before calculating rates: - -[source,console] ----- -GET sales/_search -{ - "size": 0, - "runtime_mappings": { - "price.adjusted": { - "type": "double", - "script": { - "source": "emit(doc['price'].value * params.adjustment)", - "params": { - "adjustment": 0.9 - } - } - } - }, - "aggs": { - "by_date": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "avg_price": { - "rate": { - "field": "price.adjusted" - } - } - } - } - } -} ----- -// TEST[setup:sales] - -[source,console-result] ----- -{ - ... - "aggregations" : { - "by_date" : { - "buckets" : [ - { - "key_as_string" : "2015/01/01 00:00:00", - "key" : 1420070400000, - "doc_count" : 3, - "avg_price" : { - "value" : 495.0 - } - }, - { - "key_as_string" : "2015/02/01 00:00:00", - "key" : 1422748800000, - "doc_count" : 2, - "avg_price" : { - "value" : 54.0 - } - }, - { - "key_as_string" : "2015/03/01 00:00:00", - "key" : 1425168000000, - "doc_count" : 2, - "avg_price" : { - "value" : 337.5 - } - } - ] - } - } -} ----- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] diff --git a/docs/reference/aggregations/metrics/scripted-metric-aggregation.asciidoc b/docs/reference/aggregations/metrics/scripted-metric-aggregation.asciidoc deleted file mode 100644 index 4e20d01f32555..0000000000000 --- a/docs/reference/aggregations/metrics/scripted-metric-aggregation.asciidoc +++ /dev/null @@ -1,289 +0,0 @@ -[[search-aggregations-metrics-scripted-metric-aggregation]] -=== Scripted metric aggregation -++++ -Scripted metric -++++ - -A metric aggregation that executes using scripts to provide a metric output. - -WARNING: `scripted_metric` is not available in {serverless-full}. - -WARNING: Using scripts can result in slower search speeds. See -<>. When using a scripted metric aggregation, its intermediate state is serialized -into an in-memory byte array for transmission to other nodes during the aggregation process. -Consequently, a complex scripted metric aggregation may also encounter the 2GB limitation imposed on Java arrays. - -Example: - -[source,console] --------------------------------------------------- -POST ledger/_search?size=0 -{ - "query": { - "match_all": {} - }, - "aggs": { - "profit": { - "scripted_metric": { - "init_script": "state.transactions = []", <1> - "map_script": "state.transactions.add(doc.type.value == 'sale' ? doc.amount.value : -1 * doc.amount.value)", - "combine_script": "double profit = 0; for (t in state.transactions) { profit += t } return profit", - "reduce_script": "double profit = 0; for (a in states) { profit += a } return profit" - } - } - } -} --------------------------------------------------- -// TEST[setup:ledger] - -<1> `init_script` is an optional parameter, all other scripts are required. - -The above aggregation demonstrates how one would use the script aggregation compute the total profit from sale and cost transactions. - -The response for the above aggregation: - -[source,console-result] --------------------------------------------------- -{ - "took": 218, - ... - "aggregations": { - "profit": { - "value": 240.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 218/"took": $body.took/] -// TESTRESPONSE[s/\.\.\./"_shards": $body._shards, "hits": $body.hits, "timed_out": false,/] - -The above example can also be specified using stored scripts as follows: - -[source,console] --------------------------------------------------- -POST ledger/_search?size=0 -{ - "aggs": { - "profit": { - "scripted_metric": { - "init_script": { - "id": "my_init_script" - }, - "map_script": { - "id": "my_map_script" - }, - "combine_script": { - "id": "my_combine_script" - }, - "params": { - "field": "amount" <1> - }, - "reduce_script": { - "id": "my_reduce_script" - } - } - } - } -} --------------------------------------------------- -// TEST[setup:ledger,stored_scripted_metric_script] - -<1> script parameters for `init`, `map` and `combine` scripts must be specified -in a global `params` object so that it can be shared between the scripts. - -//// -Verify this response as well but in a hidden block. - -[source,console-result] --------------------------------------------------- -{ - "took": 218, - ... - "aggregations": { - "profit": { - "value": 240.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 218/"took": $body.took/] -// TESTRESPONSE[s/\.\.\./"_shards": $body._shards, "hits": $body.hits, "timed_out": false,/] -//// - -For more details on specifying scripts see <>. - -[[scripted-metric-aggregation-return-types]] -==== Allowed return types - -Whilst any valid script object can be used within a single script, the scripts must return or store in the `state` object only the following types: - -* primitive types -* String -* Map (containing only keys and values of the types listed here) -* Array (containing elements of only the types listed here) - -[[scripted-metric-aggregation-scope]] -==== Scope of scripts - -The scripted metric aggregation uses scripts at 4 stages of its execution: - -init_script:: Executed prior to any collection of documents. Allows the aggregation to set up any initial state. -+ -In the above example, the `init_script` creates an array `transactions` in the `state` object. - -map_script:: Executed once per document collected. This is a required script. -+ -In the above example, the `map_script` checks the value of the type field. If the value is 'sale' the value of the amount field -is added to the transactions array. If the value of the type field is not 'sale' the negated value of the amount field is added -to transactions. - -combine_script:: Executed once on each shard after document collection is complete. This is a required script. Allows the aggregation to - consolidate the state returned from each shard. -+ -In the above example, the `combine_script` iterates through all the stored transactions, summing the values in the `profit` variable -and finally returns `profit`. - -reduce_script:: Executed once on the coordinating node after all shards have returned their results. This is a required script. The - script is provided with access to a variable `states` which is an array of the result of the combine_script on each - shard. -+ -In the above example, the `reduce_script` iterates through the `profit` returned by each shard summing the values before returning the -final combined profit which will be returned in the response of the aggregation. - -[[scripted-metric-aggregation-example]] -==== Worked example - -Imagine a situation where you index the following documents into an index with 2 shards: - -[source,console] --------------------------------------------------- -PUT /transactions/_bulk?refresh -{"index":{"_id":1}} -{"type": "sale","amount": 80} -{"index":{"_id":2}} -{"type": "cost","amount": 10} -{"index":{"_id":3}} -{"type": "cost","amount": 30} -{"index":{"_id":4}} -{"type": "sale","amount": 130} --------------------------------------------------- - -Lets say that documents 1 and 3 end up on shard A and documents 2 and 4 end up on shard B. The following is a breakdown of what the aggregation result is -at each stage of the example above. - -===== Before init_script - -`state` is initialized as a new empty object. - -[source,js] --------------------------------------------------- -"state" : {} --------------------------------------------------- -// NOTCONSOLE - -===== After init_script - -This is run once on each shard before any document collection is performed, and so we will have a copy on each shard: - -Shard A:: -+ -[source,js] --------------------------------------------------- -"state" : { - "transactions" : [] -} --------------------------------------------------- -// NOTCONSOLE - -Shard B:: -+ -[source,js] --------------------------------------------------- -"state" : { - "transactions" : [] -} --------------------------------------------------- -// NOTCONSOLE - -===== After map_script - -Each shard collects its documents and runs the map_script on each document that is collected: - -Shard A:: -+ -[source,js] --------------------------------------------------- -"state" : { - "transactions" : [ 80, -30 ] -} --------------------------------------------------- -// NOTCONSOLE - -Shard B:: -+ -[source,js] --------------------------------------------------- -"state" : { - "transactions" : [ -10, 130 ] -} --------------------------------------------------- -// NOTCONSOLE - -===== After combine_script - -The combine_script is executed on each shard after document collection is complete and reduces all the transactions down to a single profit figure for each -shard (by summing the values in the transactions array) which is passed back to the coordinating node: - -Shard A:: 50 -Shard B:: 120 - -===== After reduce_script - -The reduce_script receives a `states` array containing the result of the combine script for each shard: - -[source,js] --------------------------------------------------- -"states" : [ - 50, - 120 -] --------------------------------------------------- -// NOTCONSOLE - -It reduces the responses for the shards down to a final overall profit figure (by summing the values) and returns this as the result of the aggregation to -produce the response: - -[source,js] --------------------------------------------------- -{ - ... - - "aggregations": { - "profit": { - "value": 170 - } - } -} --------------------------------------------------- -// NOTCONSOLE - -[[scripted-metric-aggregation-parameters]] -==== Other parameters - -[horizontal] -params:: Optional. An object whose contents will be passed as variables to the `init_script`, `map_script` and `combine_script`. This can be - useful to allow the user to control the behavior of the aggregation and for storing state between the scripts. If this is not specified, - the default is the equivalent of providing: -+ -[source,js] --------------------------------------------------- -"params" : {} --------------------------------------------------- -// NOTCONSOLE - -[[scripted-metric-aggregation-empty-buckets]] -==== Empty buckets - -If a parent bucket of the scripted metric aggregation does not collect any documents an empty aggregation response will be returned from the -shard with a `null` value. In this case the `reduce_script`'s `states` variable will contain `null` as a response from that shard. -`reduce_script`'s should therefore expect and deal with `null` responses from shards. diff --git a/docs/reference/aggregations/metrics/stats-aggregation.asciidoc b/docs/reference/aggregations/metrics/stats-aggregation.asciidoc deleted file mode 100644 index 7e6ceefa5f651..0000000000000 --- a/docs/reference/aggregations/metrics/stats-aggregation.asciidoc +++ /dev/null @@ -1,117 +0,0 @@ -[[search-aggregations-metrics-stats-aggregation]] -=== Stats aggregation -++++ -Stats -++++ - -A `multi-value` metrics aggregation that computes stats over numeric values extracted from the aggregated documents. - -The stats that are returned consist of: `min`, `max`, `sum`, `count` and `avg`. - -Assuming the data consists of documents representing exams grades (between 0 and 100) of students - -[source,console] --------------------------------------------------- -POST /exams/_search?size=0 -{ - "aggs": { - "grades_stats": { "stats": { "field": "grade" } } - } -} --------------------------------------------------- -// TEST[setup:exams] - -The above aggregation computes the grades statistics over all documents. The aggregation type is `stats` and the `field` setting defines the numeric field of the documents the stats will be computed on. The above will return the following: - - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "grades_stats": { - "count": 2, - "min": 50.0, - "max": 100.0, - "avg": 75.0, - "sum": 150.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -The name of the aggregation (`grades_stats` above) also serves as the key by which the aggregation result can be retrieved from the returned response. - -==== Script - -If you need to get the `stats` for something more complex than a single field, -run the aggregation on a <>. - -[source,console] --------------------------------------------------- -POST /exams/_search -{ - "size": 0, - "runtime_mappings": { - "grade.weighted": { - "type": "double", - "script": """ - emit(doc['grade'].value * doc['weight'].value) - """ - } - }, - "aggs": { - "grades_stats": { - "stats": { - "field": "grade.weighted" - } - } - } -} --------------------------------------------------- -// TEST[setup:exams] -// TEST[s/_search/_search?filter_path=aggregations/] - -//// -[source,console-result] --------------------------------------------------- -{ - "aggregations": { - "grades_stats": { - "count": 2, - "min": 150.0, - "max": 200.0, - "avg": 175.0, - "sum": 350.0 - } - } -} --------------------------------------------------- -//// - - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they -had a value. - -[source,console] --------------------------------------------------- -POST /exams/_search?size=0 -{ - "aggs": { - "grades_stats": { - "stats": { - "field": "grade", - "missing": 0 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:exams] - -<1> Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `0`. diff --git a/docs/reference/aggregations/metrics/string-stats-aggregation.asciidoc b/docs/reference/aggregations/metrics/string-stats-aggregation.asciidoc deleted file mode 100644 index f6d3e66d24d51..0000000000000 --- a/docs/reference/aggregations/metrics/string-stats-aggregation.asciidoc +++ /dev/null @@ -1,193 +0,0 @@ -[role="xpack"] -[[search-aggregations-metrics-string-stats-aggregation]] -=== String stats aggregation -++++ -String stats -++++ - -A `multi-value` metrics aggregation that computes statistics over string values extracted from the aggregated documents. -These values can be retrieved either from specific `keyword` fields. - -The string stats aggregation returns the following results: - -* `count` - The number of non-empty fields counted. -* `min_length` - The length of the shortest term. -* `max_length` - The length of the longest term. -* `avg_length` - The average length computed over all terms. -* `entropy` - The {wikipedia}/Entropy_(information_theory)[Shannon Entropy] value computed over all terms collected by -the aggregation. Shannon entropy quantifies the amount of information contained in the field. It is a very useful metric for -measuring a wide range of properties of a data set, such as diversity, similarity, randomness etc. - -For example: - -[source,console] --------------------------------------------------- -POST /my-index-000001/_search?size=0 -{ - "aggs": { - "message_stats": { "string_stats": { "field": "message.keyword" } } - } -} --------------------------------------------------- -// TEST[setup:messages] - -The above aggregation computes the string statistics for the `message` field in all documents. The aggregation type -is `string_stats` and the `field` parameter defines the field of the documents the stats will be computed on. -The above will return the following: - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "message_stats": { - "count": 5, - "min_length": 24, - "max_length": 30, - "avg_length": 28.8, - "entropy": 3.94617750050791 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -The name of the aggregation (`message_stats` above) also serves as the key by which the aggregation result can be retrieved from -the returned response. - -==== Character distribution - -The computation of the Shannon Entropy value is based on the probability of each character appearing in all terms collected -by the aggregation. To view the probability distribution for all characters, we can add the `show_distribution` (default: `false`) parameter. - -[source,console] --------------------------------------------------- -POST /my-index-000001/_search?size=0 -{ - "aggs": { - "message_stats": { - "string_stats": { - "field": "message.keyword", - "show_distribution": true <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:messages] - -<1> Set the `show_distribution` parameter to `true`, so that probability distribution for all characters is returned in the results. - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "message_stats": { - "count": 5, - "min_length": 24, - "max_length": 30, - "avg_length": 28.8, - "entropy": 3.94617750050791, - "distribution": { - " ": 0.1527777777777778, - "e": 0.14583333333333334, - "s": 0.09722222222222222, - "m": 0.08333333333333333, - "t": 0.0763888888888889, - "h": 0.0625, - "a": 0.041666666666666664, - "i": 0.041666666666666664, - "r": 0.041666666666666664, - "g": 0.034722222222222224, - "n": 0.034722222222222224, - "o": 0.034722222222222224, - "u": 0.034722222222222224, - "b": 0.027777777777777776, - "w": 0.027777777777777776, - "c": 0.013888888888888888, - "E": 0.006944444444444444, - "l": 0.006944444444444444, - "1": 0.006944444444444444, - "2": 0.006944444444444444, - "3": 0.006944444444444444, - "4": 0.006944444444444444, - "y": 0.006944444444444444 - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -The `distribution` object shows the probability of each character appearing in all terms. The characters are sorted by descending probability. - -==== Script - -If you need to get the `string_stats` for something more complex than a single -field, run the aggregation on a <>. - -[source,console] ----- -POST /my-index-000001/_search -{ - "size": 0, - "runtime_mappings": { - "message_and_context": { - "type": "keyword", - "script": """ - emit(doc['message.keyword'].value + ' ' + doc['context.keyword'].value) - """ - } - }, - "aggs": { - "message_stats": { - "string_stats": { "field": "message_and_context" } - } - } -} ----- -// TEST[setup:messages] -// TEST[s/_search/_search?filter_path=aggregations/] - -//// -[source,console-result] ----- -{ - "aggregations": { - "message_stats": { - "count": 5, - "min_length": 28, - "max_length": 34, - "avg_length": 32.8, - "entropy": 3.9797778402765784 - } - } -} ----- -//// - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should be treated. -By default they will be ignored but it is also possible to treat them as if they had a value. - -[source,console] --------------------------------------------------- -POST /my-index-000001/_search?size=0 -{ - "aggs": { - "message_stats": { - "string_stats": { - "field": "message.keyword", - "missing": "[empty message]" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:messages] - -<1> Documents without a value in the `message` field will be treated as documents that have the value `[empty message]`. diff --git a/docs/reference/aggregations/metrics/sum-aggregation.asciidoc b/docs/reference/aggregations/metrics/sum-aggregation.asciidoc deleted file mode 100644 index e326de11ef00c..0000000000000 --- a/docs/reference/aggregations/metrics/sum-aggregation.asciidoc +++ /dev/null @@ -1,191 +0,0 @@ -[[search-aggregations-metrics-sum-aggregation]] -=== Sum aggregation -++++ -Sum -++++ - -A `single-value` metrics aggregation that sums up numeric values that are extracted from the aggregated documents. -These values can be extracted either from specific numeric or <> fields. - -Assuming the data consists of documents representing sales records we can sum -the sale price of all hats with: - -[source,console] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "query": { - "constant_score": { - "filter": { - "match": { "type": "hat" } - } - } - }, - "aggs": { - "hat_prices": { "sum": { "field": "price" } } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Resulting in: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "hat_prices": { - "value": 450.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -The name of the aggregation (`hat_prices` above) also serves as the key by which the aggregation result can be retrieved from the returned response. - -==== Script - -If you need to get the `sum` for something more complex than a single -field, run the aggregation on a <>. - -[source,console] ----- -POST /sales/_search?size=0 -{ - "runtime_mappings": { - "price.weighted": { - "type": "double", - "script": """ - double price = doc['price'].value; - if (doc['promoted'].value) { - price *= 0.8; - } - emit(price); - """ - } - }, - "query": { - "constant_score": { - "filter": { - "match": { "type": "hat" } - } - } - }, - "aggs": { - "hat_prices": { - "sum": { - "field": "price.weighted" - } - } - } -} ----- -// TEST[setup:sales] -// TEST[s/size=0/size=0&filter_path=aggregations/] - -//// -[source,console-result] ----- -{ - "aggregations": { - "hat_prices": { - "value": 370.0 - } - } -} ----- -//// - -==== Missing value - -The `missing` parameter defines how documents that are missing a value should -be treated. By default documents missing the value will be ignored but it is -also possible to treat them as if they had a value. For example, this treats -all hat sales without a price as being `100`. - -[source,console] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "query": { - "constant_score": { - "filter": { - "match": { "type": "hat" } - } - } - }, - "aggs": { - "hat_prices": { - "sum": { - "field": "price", - "missing": 100 <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -[[search-aggregations-metrics-sum-aggregation-histogram-fields]] -==== Histogram fields - -When sum is computed on <>, the result of the aggregation is the sum of all elements in the `values` -array multiplied by the number in the same position in the `counts` array. - -For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks: - -[source,console] --------------------------------------------------- -PUT metrics_index -{ - "mappings": { - "properties": { - "latency_histo": { "type": "histogram" } - } - } -} - -PUT metrics_index/_doc/1?refresh -{ - "network.name" : "net-1", - "latency_histo" : { - "values" : [0.1, 0.2, 0.3, 0.4, 0.5], - "counts" : [3, 7, 23, 12, 6] - } -} - -PUT metrics_index/_doc/2?refresh -{ - "network.name" : "net-2", - "latency_histo" : { - "values" : [0.1, 0.2, 0.3, 0.4, 0.5], - "counts" : [8, 17, 8, 7, 6] - } -} - -POST /metrics_index/_search?size=0&filter_path=aggregations -{ - "aggs" : { - "total_latency" : { "sum" : { "field" : "latency_histo" } } - } -} --------------------------------------------------- - -For each histogram field, the `sum` aggregation will add each number in the -`values` array, multiplied by its associated count in the `counts` array. - -Eventually, it will add all values for all histograms and return the following -result: - -[source,console-result] --------------------------------------------------- -{ - "aggregations": { - "total_latency": { - "value": 28.8 - } - } -} --------------------------------------------------- diff --git a/docs/reference/aggregations/metrics/t-test-aggregation.asciidoc b/docs/reference/aggregations/metrics/t-test-aggregation.asciidoc deleted file mode 100644 index 0bd863cc53600..0000000000000 --- a/docs/reference/aggregations/metrics/t-test-aggregation.asciidoc +++ /dev/null @@ -1,192 +0,0 @@ -[role="xpack"] -[[search-aggregations-metrics-ttest-aggregation]] -=== T-test aggregation -++++ -T-test -++++ - -A `t_test` metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a Student's t-distribution -under the null hypothesis on numeric values extracted from the aggregated documents. In practice, this -will tell you if the difference between two population means are statistically significant and did not occur by chance alone. - -==== Syntax - -A `t_test` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "t_test": { - "a": "value_before", - "b": "value_after", - "type": "paired" - } -} --------------------------------------------------- -// NOTCONSOLE - -Assuming that we have a record of node start up times before and after upgrade, let's look at a t-test to see if upgrade affected -the node start up time in a meaningful way. - -[source,console] --------------------------------------------------- -GET node_upgrade/_search -{ - "size": 0, - "aggs": { - "startup_time_ttest": { - "t_test": { - "a": { "field": "startup_time_before" }, <1> - "b": { "field": "startup_time_after" }, <2> - "type": "paired" <3> - } - } - } -} --------------------------------------------------- -// TEST[setup:node_upgrade] -<1> The field `startup_time_before` must be a numeric field. -<2> The field `startup_time_after` must be a numeric field. -<3> Since we have data from the same nodes, we are using paired t-test. - -The response will return the p-value or probability value for the test. It is the probability of obtaining results at least as extreme as -the result processed by the aggregation, assuming that the null hypothesis is correct (which means there is no difference between -population means). Smaller p-value means the null hypothesis is more likely to be incorrect and population means are indeed different. - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "startup_time_ttest": { - "value": 0.1914368843365979 <1> - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -<1> The p-value. - -==== T-Test Types - -The `t_test` aggregation supports unpaired and paired two-sample t-tests. The type of the test can be specified using the `type` parameter: - -`"type": "paired"`:: performs paired t-test -`"type": "homoscedastic"`:: performs two-sample equal variance test -`"type": "heteroscedastic"`:: performs two-sample unequal variance test (this is default) - -==== Filters - -It is also possible to run unpaired t-test on different sets of records using filters. For example, if we want to test the difference -of startup times before upgrade between two different groups of nodes, we use the same field `startup_time_before` by separate groups of -nodes using terms filters on the group name field: - -[source,console] --------------------------------------------------- -GET node_upgrade/_search -{ - "size": 0, - "aggs": { - "startup_time_ttest": { - "t_test": { - "a": { - "field": "startup_time_before", <1> - "filter": { - "term": { - "group": "A" <2> - } - } - }, - "b": { - "field": "startup_time_before", <3> - "filter": { - "term": { - "group": "B" <4> - } - } - }, - "type": "heteroscedastic" <5> - } - } - } -} --------------------------------------------------- -// TEST[setup:node_upgrade] -<1> The field `startup_time_before` must be a numeric field. -<2> Any query that separates two groups can be used here. -<3> We are using the same field -<4> but we are using different filters. -<5> Since we have data from different nodes, we cannot use paired t-test. - - -[source,console-result] --------------------------------------------------- -{ - ... - - "aggregations": { - "startup_time_ttest": { - "value": 0.2981858007281437 <1> - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -<1> The p-value. - -Populations don't have to be in the same index. If data sets are located in different -indices, the term filter on the <> field can be used to select populations. - -==== Script - -If you need to run the `t_test` on values that aren't represented cleanly -by a field you should, run the aggregation on a <>. -For example, if you want to adjust out load times for the before values: - -[source,console] ----- -GET node_upgrade/_search -{ - "size": 0, - "runtime_mappings": { - "startup_time_before.adjusted": { - "type": "long", - "script": { - "source": "emit(doc['startup_time_before'].value - params.adjustment)", - "params": { - "adjustment": 10 - } - } - } - }, - "aggs": { - "startup_time_ttest": { - "t_test": { - "a": { - "field": "startup_time_before.adjusted" - }, - "b": { - "field": "startup_time_after" - }, - "type": "paired" - } - } - } -} ----- -// TEST[setup:node_upgrade] -// TEST[s/_search/_search?filter_path=aggregations/] - -//// -[source,console-result] ----- -{ - "aggregations": { - "startup_time_ttest": { - "value": 0.9397399375119482 - } - } -} ----- -//// diff --git a/docs/reference/aggregations/metrics/top-metrics-aggregation.asciidoc b/docs/reference/aggregations/metrics/top-metrics-aggregation.asciidoc deleted file mode 100644 index 5098c9cedacca..0000000000000 --- a/docs/reference/aggregations/metrics/top-metrics-aggregation.asciidoc +++ /dev/null @@ -1,531 +0,0 @@ -[role="xpack"] -[[search-aggregations-metrics-top-metrics]] -=== Top metrics aggregation -++++ -Top metrics -++++ - -The `top_metrics` aggregation selects metrics from the document with the largest or smallest "sort" -value. For example, this gets the value of the `m` field on the document with the largest value of `s`: - -[source,console,id=search-aggregations-metrics-top-metrics-simple] ----- -POST /test/_bulk?refresh -{"index": {}} -{"s": 1, "m": 3.1415} -{"index": {}} -{"s": 2, "m": 1.0} -{"index": {}} -{"s": 3, "m": 2.71828} -POST /test/_search?filter_path=aggregations -{ - "aggs": { - "tm": { - "top_metrics": { - "metrics": {"field": "m"}, - "sort": {"s": "desc"} - } - } - } -} ----- - -Which returns: - -[source,js] ----- -{ - "aggregations": { - "tm": { - "top": [ {"sort": [3], "metrics": {"m": 2.718280076980591 } } ] - } - } -} ----- -// TESTRESPONSE - -`top_metrics` is fairly similar to <> -in spirit but because it is more limited it is able to do its job using less memory and is often -faster. - -==== `sort` - -The `sort` field in the metric request functions exactly the same as the `sort` field in the -<> request except: - -* It can't be used on <>, <>, <>, - <>, or <> fields. -* It only supports a single sort value so which document wins ties is not specified. - -The metrics that the aggregation returns is the first hit that would be returned by the search -request. So, - -`"sort": {"s": "desc"}`:: gets metrics from the document with the highest `s` -`"sort": {"s": "asc"}`:: gets the metrics from the document with the lowest `s` -`"sort": {"_geo_distance": {"location": "POINT (-78.6382 35.7796)"}}`:: - gets metrics from the documents with `location` *closest* to `35.7796, -78.6382` -`"sort": "_score"`:: gets metrics from the document with the highest score - -==== `metrics` - -`metrics` selects the fields of the "top" document to return. You can request -a single metric with something like `"metrics": {"field": "m"}` or multiple -metrics by requesting a list of metrics like `"metrics": [{"field": "m"}, {"field": "i"}`. - -`metrics.field` supports the following field types: - -* <> -* <> -* <> -* <> - -Except for keywords, <> for corresponding types are also -supported. `metrics.field` doesn't support fields with <>. A -`top_metric` aggregation on array values may return inconsistent results. - -The following example runs a `top_metrics` aggregation on several field types. - -[source,console,id=search-aggregations-metrics-top-metrics-list-of-metrics] ----- -PUT /test -{ - "mappings": { - "properties": { - "d": {"type": "date"} - } - } -} -POST /test/_bulk?refresh -{"index": {}} -{"s": 1, "m": 3.1415, "i": 1, "d": "2020-01-01T00:12:12Z", "t": "cat"} -{"index": {}} -{"s": 2, "m": 1.0, "i": 6, "d": "2020-01-02T00:12:12Z", "t": "dog"} -{"index": {}} -{"s": 3, "m": 2.71828, "i": -12, "d": "2019-12-31T00:12:12Z", "t": "chicken"} -POST /test/_search?filter_path=aggregations -{ - "aggs": { - "tm": { - "top_metrics": { - "metrics": [ - {"field": "m"}, - {"field": "i"}, - {"field": "d"}, - {"field": "t.keyword"} - ], - "sort": {"s": "desc"} - } - } - } -} ----- - -Which returns: - -[source,js] ----- -{ - "aggregations": { - "tm": { - "top": [ { - "sort": [3], - "metrics": { - "m": 2.718280076980591, - "i": -12, - "d": "2019-12-31T00:12:12.000Z", - "t.keyword": "chicken" - } - } ] - } - } -} ----- -// TESTRESPONSE - - -==== `missing` - -The `missing` parameter defines how documents with a missing value are treated. -By default, if any of the key components are missing, the entire document is -ignored. It is possible to treat the missing components as if they had a value -by using the `missing` parameter. - -[source,console] ----- -PUT /my-index -{ - "mappings": { - "properties": { - "nr": { "type": "integer" }, - "state": { "type": "keyword" } <1> - } - } -} -POST /my-index/_bulk?refresh -{"index": {}} -{"nr": 1, "state": "started"} -{"index": {}} -{"nr": 2, "state": "stopped"} -{"index": {}} -{"nr": 3, "state": "N/A"} -{"index": {}} -{"nr": 4} <2> -POST /my-index/_search?filter_path=aggregations -{ - "aggs": { - "my_top_metrics": { - "top_metrics": { - "metrics": { - "field": "state", - "missing": "N/A"}, <3> - "sort": {"nr": "desc"} - } - } - } -} ----- - -<1> If you want to use an aggregation on textual content, it must be a `keyword` -type field or you must enable fielddata on that field. -<2> This document has a missing `state` field value. -<3> The `missing` parameter defines that if `state` field has a missing value, -it should be treated as if it had the `N/A` value. - -The request results in the following response: - -[source,console-result] ----- -{ - "aggregations": { - "my_top_metrics": { - "top": [ - { - "sort": [ - 4 - ], - "metrics": { - "state": "N/A" - } - } - ] - } - } -} ----- - - -==== `size` - -`top_metrics` can return the top few document's worth of metrics using the size parameter: - -[source,console,id=search-aggregations-metrics-top-metrics-size] ----- -POST /test/_bulk?refresh -{"index": {}} -{"s": 1, "m": 3.1415} -{"index": {}} -{"s": 2, "m": 1.0} -{"index": {}} -{"s": 3, "m": 2.71828} -POST /test/_search?filter_path=aggregations -{ - "aggs": { - "tm": { - "top_metrics": { - "metrics": {"field": "m"}, - "sort": {"s": "desc"}, - "size": 3 - } - } - } -} ----- - -Which returns: - -[source,js] ----- -{ - "aggregations": { - "tm": { - "top": [ - {"sort": [3], "metrics": {"m": 2.718280076980591 } }, - {"sort": [2], "metrics": {"m": 1.0 } }, - {"sort": [1], "metrics": {"m": 3.1414999961853027 } } - ] - } - } -} ----- -// TESTRESPONSE - -The default `size` is 1. The maximum default size is `10` because the aggregation's -working storage is "dense", meaning we allocate `size` slots for every bucket. `10` -is a *very* conservative default maximum and you can raise it if you need to by -changing the `top_metrics_max_size` index setting. But know that large sizes can -take a fair bit of memory, especially if they are inside of an aggregation which -makes many buckes like a large -<>. If -you till want to raise it, use something like: - -[source,console] ----- -PUT /test/_settings -{ - "top_metrics_max_size": 100 -} ----- -// TEST[continued] - -NOTE: If `size` is more than `1` the `top_metrics` aggregation can't be the *target* of a sort. - -==== Examples - -[[search-aggregations-metrics-top-metrics-example-terms]] -===== Use with terms - -This aggregation should be quite useful inside of <> -aggregation, to, say, find the last value reported by each server. - -[source,console,id=search-aggregations-metrics-top-metrics-terms] ----- -PUT /node -{ - "mappings": { - "properties": { - "ip": {"type": "ip"}, - "date": {"type": "date"} - } - } -} -POST /node/_bulk?refresh -{"index": {}} -{"ip": "192.168.0.1", "date": "2020-01-01T01:01:01", "m": 1} -{"index": {}} -{"ip": "192.168.0.1", "date": "2020-01-01T02:01:01", "m": 2} -{"index": {}} -{"ip": "192.168.0.2", "date": "2020-01-01T02:01:01", "m": 3} -POST /node/_search?filter_path=aggregations -{ - "aggs": { - "ip": { - "terms": { - "field": "ip" - }, - "aggs": { - "tm": { - "top_metrics": { - "metrics": {"field": "m"}, - "sort": {"date": "desc"} - } - } - } - } - } -} ----- - -Which returns: - -[source,js] ----- -{ - "aggregations": { - "ip": { - "buckets": [ - { - "key": "192.168.0.1", - "doc_count": 2, - "tm": { - "top": [ {"sort": ["2020-01-01T02:01:01.000Z"], "metrics": {"m": 2 } } ] - } - }, - { - "key": "192.168.0.2", - "doc_count": 1, - "tm": { - "top": [ {"sort": ["2020-01-01T02:01:01.000Z"], "metrics": {"m": 3 } } ] - } - } - ], - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0 - } - } -} ----- -// TESTRESPONSE - -Unlike `top_hits`, you can sort buckets by the results of this metric: - -[source,console] ----- -POST /node/_search?filter_path=aggregations -{ - "aggs": { - "ip": { - "terms": { - "field": "ip", - "order": {"tm.m": "desc"} - }, - "aggs": { - "tm": { - "top_metrics": { - "metrics": {"field": "m"}, - "sort": {"date": "desc"} - } - } - } - } - } -} ----- -// TEST[continued] - -Which returns: - -[source,js] ----- -{ - "aggregations": { - "ip": { - "buckets": [ - { - "key": "192.168.0.2", - "doc_count": 1, - "tm": { - "top": [ {"sort": ["2020-01-01T02:01:01.000Z"], "metrics": {"m": 3 } } ] - } - }, - { - "key": "192.168.0.1", - "doc_count": 2, - "tm": { - "top": [ {"sort": ["2020-01-01T02:01:01.000Z"], "metrics": {"m": 2 } } ] - } - } - ], - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0 - } - } -} ----- -// TESTRESPONSE - -===== Mixed sort types - -Sorting `top_metrics` by a field that has different types across different -indices producs somewhat surprising results: floating point fields are -always sorted independently of whole numbered fields. - -[source,console,id=search-aggregations-metrics-top-metrics-mixed-sort] ----- -POST /test/_bulk?refresh -{"index": {"_index": "test1"}} -{"s": 1, "m": 3.1415} -{"index": {"_index": "test1"}} -{"s": 2, "m": 1} -{"index": {"_index": "test2"}} -{"s": 3.1, "m": 2.71828} -POST /test*/_search?filter_path=aggregations -{ - "aggs": { - "tm": { - "top_metrics": { - "metrics": {"field": "m"}, - "sort": {"s": "asc"} - } - } - } -} ----- - -Which returns: - -[source,js] ----- -{ - "aggregations": { - "tm": { - "top": [ {"sort": [3.0999999046325684], "metrics": {"m": 2.718280076980591 } } ] - } - } -} ----- -// TESTRESPONSE - -While this is better than an error it *probably* isn't what you were going for. -While it does lose some precision, you can explicitly cast the whole number -fields to floating points with something like: - -[source,console] ----- -POST /test*/_search?filter_path=aggregations -{ - "aggs": { - "tm": { - "top_metrics": { - "metrics": {"field": "m"}, - "sort": {"s": {"order": "asc", "numeric_type": "double"}} - } - } - } -} ----- -// TEST[continued] - -Which returns the much more expected: - -[source,js] ----- -{ - "aggregations": { - "tm": { - "top": [ {"sort": [1.0], "metrics": {"m": 3.1414999961853027 } } ] - } - } -} ----- -// TESTRESPONSE - -===== Use in pipeline aggregations - -`top_metrics` can be used in pipeline aggregations that consume a single value per bucket, such as `bucket_selector` -that applies per bucket filtering, similar to using a HAVING clause in SQL. This requires setting `size` to 1, and -specifying the right path for the (single) metric to be passed to the wrapping aggregator. For example: - -[source,console] ----- -POST /test*/_search?filter_path=aggregations -{ - "aggs": { - "ip": { - "terms": { - "field": "ip" - }, - "aggs": { - "tm": { - "top_metrics": { - "metrics": {"field": "m"}, - "sort": {"s": "desc"}, - "size": 1 - } - }, - "having_tm": { - "bucket_selector": { - "buckets_path": { - "top_m": "tm[m]" - }, - "script": "params.top_m < 1000" - } - } - } - } - } -} ----- -// TEST[continued] - -The `bucket_path` uses the `top_metrics` name `tm` and a keyword for the metric providing the aggregate value, -namely `m`. diff --git a/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc b/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc deleted file mode 100644 index 515ad38bd814f..0000000000000 --- a/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc +++ /dev/null @@ -1,470 +0,0 @@ -[[search-aggregations-metrics-top-hits-aggregation]] -=== Top hits aggregation -++++ -Top hits -++++ - -A `top_hits` metric aggregator keeps track of the most relevant document being aggregated. This aggregator is intended -to be used as a sub aggregator, so that the top matching documents can be aggregated per bucket. - -TIP: We do not recommend using `top_hits` as a top-level aggregation. If you -want to group search hits, use the <> -parameter instead. - -The `top_hits` aggregator can effectively be used to group result sets by certain fields via a bucket aggregator. -One or more bucket aggregators determines by which properties a result set get sliced into. - -==== Options - -* `from` - The offset from the first result you want to fetch. -* `size` - The maximum number of top matching hits to return per bucket. By default the top three matching hits are returned. -* `sort` - How the top matching hits should be sorted. By default the hits are sorted by the score of the main query. - -==== Supported per hit features - -The top_hits aggregation returns regular search hits, because of this many per hit features can be supported: - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -IMPORTANT: If you *only* need `docvalue_fields`, `size`, and `sort` then -<> might be a more efficient choice than the Top Hits Aggregation. - -`top_hits` does not support the <> parameter. Query rescoring -applies only to search hits, not aggregation results. To change the scores used -by aggregations, use a <> or -<> query. - -==== Example - -In the following example we group the sales by type and per type we show the last sale. -For each sale only the date and price fields are being included in the source. - -[source,console] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "top_tags": { - "terms": { - "field": "type", - "size": 3 - }, - "aggs": { - "top_sales_hits": { - "top_hits": { - "sort": [ - { - "date": { - "order": "desc" - } - } - ], - "_source": { - "includes": [ "date", "price" ] - }, - "size": 1 - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Possible response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "top_tags": { - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0, - "buckets": [ - { - "key": "hat", - "doc_count": 3, - "top_sales_hits": { - "hits": { - "total" : { - "value": 3, - "relation": "eq" - }, - "max_score": null, - "hits": [ - { - "_index": "sales", - "_id": "AVnNBmauCQpcRyxw6ChK", - "_source": { - "date": "2015/03/01 00:00:00", - "price": 200 - }, - "sort": [ - 1425168000000 - ], - "_score": null - } - ] - } - } - }, - { - "key": "t-shirt", - "doc_count": 3, - "top_sales_hits": { - "hits": { - "total" : { - "value": 3, - "relation": "eq" - }, - "max_score": null, - "hits": [ - { - "_index": "sales", - "_id": "AVnNBmauCQpcRyxw6ChL", - "_source": { - "date": "2015/03/01 00:00:00", - "price": 175 - }, - "sort": [ - 1425168000000 - ], - "_score": null - } - ] - } - } - }, - { - "key": "bag", - "doc_count": 1, - "top_sales_hits": { - "hits": { - "total" : { - "value": 1, - "relation": "eq" - }, - "max_score": null, - "hits": [ - { - "_index": "sales", - "_id": "AVnNBmatCQpcRyxw6ChH", - "_source": { - "date": "2015/01/01 00:00:00", - "price": 150 - }, - "sort": [ - 1420070400000 - ], - "_score": null - } - ] - } - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -// TESTRESPONSE[s/AVnNBmauCQpcRyxw6ChK/$body.aggregations.top_tags.buckets.0.top_sales_hits.hits.hits.0._id/] -// TESTRESPONSE[s/AVnNBmauCQpcRyxw6ChL/$body.aggregations.top_tags.buckets.1.top_sales_hits.hits.hits.0._id/] -// TESTRESPONSE[s/AVnNBmatCQpcRyxw6ChH/$body.aggregations.top_tags.buckets.2.top_sales_hits.hits.hits.0._id/] - - -==== Field collapse example - -Field collapsing or result grouping is a feature that logically groups a result set into groups and per group returns -top documents. The ordering of the groups is determined by the relevancy of the first document in a group. In -Elasticsearch this can be implemented via a bucket aggregator that wraps a `top_hits` aggregator as sub-aggregator. - -In the example below we search across crawled webpages. For each webpage we store the body and the domain the webpage -belong to. By defining a `terms` aggregator on the `domain` field we group the result set of webpages by domain. The -`top_hits` aggregator is then defined as sub-aggregator, so that the top matching hits are collected per bucket. - -Also a `max` aggregator is defined which is used by the `terms` aggregator's order feature to return the buckets by -relevancy order of the most relevant document in a bucket. - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "query": { - "match": { - "body": "elections" - } - }, - "aggs": { - "top_sites": { - "terms": { - "field": "domain", - "order": { - "top_hit": "desc" - } - }, - "aggs": { - "top_tags_hits": { - "top_hits": {} - }, - "top_hit" : { - "max": { - "script": { - "source": "_score" - } - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -At the moment the `max` (or `min`) aggregator is needed to make sure the buckets from the `terms` aggregator are -ordered according to the score of the most relevant webpage per domain. Unfortunately the `top_hits` aggregator -can't be used in the `order` option of the `terms` aggregator yet. - -==== top_hits support in a nested or reverse_nested aggregator - -If the `top_hits` aggregator is wrapped in a `nested` or `reverse_nested` aggregator then nested hits are being returned. -Nested hits are in a sense hidden mini documents that are part of regular document where in the mapping a nested field type -has been configured. The `top_hits` aggregator has the ability to un-hide these documents if it is wrapped in a `nested` -or `reverse_nested` aggregator. Read more about nested in the <>. - -If nested type has been configured a single document is actually indexed as multiple Lucene documents and they share -the same id. In order to determine the identity of a nested hit there is more needed than just the id, so that is why -nested hits also include their nested identity. The nested identity is kept under the `_nested` field in the search hit -and includes the array field and the offset in the array field the nested hit belongs to. The offset is zero based. - -Let's see how it works with a real sample. Considering the following mapping: - -[source,console] --------------------------------------------------- -PUT /sales -{ - "mappings": { - "properties": { - "tags": { "type": "keyword" }, - "comments": { <1> - "type": "nested", - "properties": { - "username": { "type": "keyword" }, - "comment": { "type": "text" } - } - } - } - } -} --------------------------------------------------- - -<1> The `comments` is an array that holds nested documents under the `product` object. - -And some documents: - -[source,console] --------------------------------------------------- -PUT /sales/_doc/1?refresh -{ - "tags": [ "car", "auto" ], - "comments": [ - { "username": "baddriver007", "comment": "This car could have better brakes" }, - { "username": "dr_who", "comment": "Where's the autopilot? Can't find it" }, - { "username": "ilovemotorbikes", "comment": "This car has two extra wheels" } - ] -} --------------------------------------------------- -// TEST[continued] - -It's now possible to execute the following `top_hits` aggregation (wrapped in a `nested` aggregation): - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "query": { - "term": { "tags": "car" } - }, - "aggs": { - "by_sale": { - "nested": { - "path": "comments" - }, - "aggs": { - "by_user": { - "terms": { - "field": "comments.username", - "size": 1 - }, - "aggs": { - "by_nested": { - "top_hits": {} - } - } - } - } - } - } -} --------------------------------------------------- -// TEST[continued] -// TEST[s/_search/_search\?filter_path=aggregations.by_sale.by_user.buckets/] - -Top hits response snippet with a nested hit, which resides in the first slot of array field `comments`: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "by_sale": { - "by_user": { - "buckets": [ - { - "key": "baddriver007", - "doc_count": 1, - "by_nested": { - "hits": { - "total" : { - "value": 1, - "relation": "eq" - }, - "max_score": 0.3616575, - "hits": [ - { - "_index": "sales", - "_id": "1", - "_nested": { - "field": "comments", <1> - "offset": 0 <2> - }, - "_score": 0.3616575, - "_source": { - "comment": "This car could have better brakes", <3> - "username": "baddriver007" - } - } - ] - } - } - } - ... - ] - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.//] - -<1> Name of the array field containing the nested hit -<2> Position if the nested hit in the containing array -<3> Source of the nested hit - -If `_source` is requested then just the part of the source of the nested object is returned, not the entire source of the document. -Also stored fields on the *nested* inner object level are accessible via `top_hits` aggregator residing in a `nested` or `reverse_nested` aggregator. - -Only nested hits will have a `_nested` field in the hit, non nested (regular) hits will not have a `_nested` field. - -The information in `_nested` can also be used to parse the original source somewhere else if `_source` isn't enabled. - -If there are multiple levels of nested object types defined in mappings then the `_nested` information can also be hierarchical -in order to express the identity of nested hits that are two layers deep or more. - -In the example below a nested hit resides in the first slot of the field `nested_grand_child_field` which then resides in -the second slow of the `nested_child_field` field: - -[source,js] --------------------------------------------------- -... -"hits": { - "total" : { - "value": 2565, - "relation": "eq" - }, - "max_score": 1, - "hits": [ - { - "_index": "a", - "_id": "1", - "_score": 1, - "_nested" : { - "field" : "nested_child_field", - "offset" : 1, - "_nested" : { - "field" : "nested_grand_child_field", - "offset" : 0 - } - } - "_source": ... - }, - ... - ] -} -... --------------------------------------------------- -// NOTCONSOLE - -==== Use in pipeline aggregations - -`top_hits` can be used in pipeline aggregations that consume a single value per bucket, such as `bucket_selector` -that applies per bucket filtering, similar to using a HAVING clause in SQL. This requires setting `size` to 1, and -specifying the right path for the value to be passed to the wrapping aggregator. The latter can be a `_source`, a -`_sort` or a `_score` value. For example: - -[source,console] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs": { - "top_tags": { - "terms": { - "field": "type", - "size": 3 - }, - "aggs": { - "top_sales_hits": { - "top_hits": { - "sort": [ - { - "date": { - "order": "desc" - } - } - ], - "_source": { - "includes": [ "date", "price" ] - }, - "size": 1 - } - }, - "having.top_salary": { - "bucket_selector": { - "buckets_path": { - "tp": "top_sales_hits[_source.price]" - }, - "script": "params.tp < 180" - } - } - } - } - } -} - --------------------------------------------------- -// TEST[setup:sales] - -The `bucket_path` uses the `top_hits` name `top_sales_hits` and a keyword for the field providing the aggregate value, -namely `_source` field `price` in the example above. Other options include `top_sales_hits[_sort]`, for filtering on the -sort value `date` above, and `top_sales_hits[_score]`, for filtering on the score of the top hit. diff --git a/docs/reference/aggregations/metrics/valuecount-aggregation.asciidoc b/docs/reference/aggregations/metrics/valuecount-aggregation.asciidoc deleted file mode 100644 index 50fc210ed4057..0000000000000 --- a/docs/reference/aggregations/metrics/valuecount-aggregation.asciidoc +++ /dev/null @@ -1,140 +0,0 @@ -[[search-aggregations-metrics-valuecount-aggregation]] -=== Value count aggregation -++++ -Value count -++++ - -A `single-value` metrics aggregation that counts the number of values that are extracted from the aggregated documents. -These values can be extracted either from specific fields in the documents, or be generated by a provided script. Typically, -this aggregator will be used in conjunction with other single-value aggregations. For example, when computing the `avg` -one might be interested in the number of values the average is computed over. - -`value_count` does not de-duplicate values, so even if a field has duplicates each value will be counted individually. - -[source,console] --------------------------------------------------- -POST /sales/_search?size=0 -{ - "aggs" : { - "types_count" : { "value_count" : { "field" : "type" } } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Response: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "types_count": { - "value": 7 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -The name of the aggregation (`types_count` above) also serves as the key by which the aggregation result can be -retrieved from the returned response. - -==== Script - -If you need to count something more complex than the values in a single field -you should run the aggregation on a <>. - -[source,console] ----- -POST /sales/_search -{ - "size": 0, - "runtime_mappings": { - "tags": { - "type": "keyword", - "script": """ - emit(doc['type'].value); - if (doc['promoted'].value) { - emit('hot'); - } - """ - } - }, - "aggs": { - "tags_count": { - "value_count": { - "field": "tags" - } - } - } -} ----- -// TEST[setup:sales] -// TEST[s/_search/_search?filter_path=aggregations/] - -//// -[source,console-result] ----- -{ - "aggregations": { - "tags_count": { - "value": 12 - } - } -} ----- -//// - -[[search-aggregations-metrics-valuecount-aggregation-histogram-fields]] -==== Histogram fields -When the `value_count` aggregation is computed on <>, the result of the aggregation is the sum of all numbers -in the `counts` array of the histogram. - -For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks: - -[source,console] --------------------------------------------------- -PUT metrics_index/_doc/1 -{ - "network.name" : "net-1", - "latency_histo" : { - "values" : [0.1, 0.2, 0.3, 0.4, 0.5], - "counts" : [3, 7, 23, 12, 6] <1> - } -} - -PUT metrics_index/_doc/2 -{ - "network.name" : "net-2", - "latency_histo" : { - "values" : [0.1, 0.2, 0.3, 0.4, 0.5], - "counts" : [8, 17, 8, 7, 6] <1> - } -} - -POST /metrics_index/_search?size=0 -{ - "aggs": { - "total_requests": { - "value_count": { "field": "latency_histo" } - } - } -} --------------------------------------------------- - -For each histogram field the `value_count` aggregation will sum all numbers in the `counts` array <1>. -Eventually, it will add all values for all histograms and return the following result: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "total_requests": { - "value": 97 - } - } -} --------------------------------------------------- -// TESTRESPONSE[skip:test not setup] diff --git a/docs/reference/aggregations/metrics/weighted-avg-aggregation.asciidoc b/docs/reference/aggregations/metrics/weighted-avg-aggregation.asciidoc deleted file mode 100644 index c209867591763..0000000000000 --- a/docs/reference/aggregations/metrics/weighted-avg-aggregation.asciidoc +++ /dev/null @@ -1,234 +0,0 @@ -[[search-aggregations-metrics-weight-avg-aggregation]] -=== Weighted avg aggregation -++++ -Weighted avg -++++ - -A `single-value` metrics aggregation that computes the weighted average of numeric values that are extracted from the aggregated documents. -These values can be extracted either from specific numeric fields in the documents. - -When calculating a regular average, each datapoint has an equal "weight" ... it contributes equally to the final value. Weighted averages, -on the other hand, weight each datapoint differently. The amount that each datapoint contributes to the final value is extracted from the -document. - -As a formula, a weighted average is the `∑(value * weight) / ∑(weight)` - -A regular average can be thought of as a weighted average where every value has an implicit weight of `1`. - -[[weighted-avg-params]] -.`weighted_avg` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`value` | The configuration for the field or script that provides the values |Required | -|`weight` | The configuration for the field or script that provides the weights |Required | -|`format` | The numeric response formatter |Optional | -|=== - -The `value` and `weight` objects have per-field specific configuration: - -[[value-params]] -.`value` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`field` | The field that values should be extracted from |Required | -|`missing` | A value to use if the field is missing entirely |Optional | -|=== - -[[weight-params]] -.`weight` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`field` | The field that weights should be extracted from |Required | -|`missing` | A weight to use if the field is missing entirely |Optional | -|=== - - -==== Examples - -If our documents have a `"grade"` field that holds a 0-100 numeric score, and a `"weight"` field which holds an arbitrary numeric weight, -we can calculate the weighted average using: - -[source,console] --------------------------------------------------- -POST /exams/_search -{ - "size": 0, - "aggs": { - "weighted_grade": { - "weighted_avg": { - "value": { - "field": "grade" - }, - "weight": { - "field": "weight" - } - } - } - } -} --------------------------------------------------- -// TEST[setup:exams] - -Which yields a response like: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "weighted_grade": { - "value": 70.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - - -While multiple values-per-field are allowed, only one weight is allowed. If the aggregation encounters -a document that has more than one weight (e.g. the weight field is a multi-valued field) it will abort the search. -If you have this situation, you should build a <> -to combine those values into a single weight. - -This single weight will be applied independently to each value extracted from the `value` field. - -This example show how a single document with multiple values will be averaged with a single weight: - -[source,console] --------------------------------------------------- -POST /exams/_doc?refresh -{ - "grade": [1, 2, 3], - "weight": 2 -} - -POST /exams/_search -{ - "size": 0, - "aggs": { - "weighted_grade": { - "weighted_avg": { - "value": { - "field": "grade" - }, - "weight": { - "field": "weight" - } - } - } - } -} --------------------------------------------------- -// TEST - -The three values (`1`, `2`, and `3`) will be included as independent values, all with the weight of `2`: - -[source,console-result] --------------------------------------------------- -{ - ... - "aggregations": { - "weighted_grade": { - "value": 2.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] - -The aggregation returns `2.0` as the result, which matches what we would expect when calculating by hand: -`((1*2) + (2*2) + (3*2)) / (2+2+2) == 2` - -[[search-aggregations-metrics-weight-avg-aggregation-runtime-field]] -==== Runtime field - -If you have to sum or weigh values that don't quite line up with the indexed -values, run the aggregation on a <>. - -[source,console] ----- -POST /exams/_doc?refresh -{ - "grade": 100, - "weight": [2, 3] -} -POST /exams/_doc?refresh -{ - "grade": 80, - "weight": 3 -} - -POST /exams/_search?filter_path=aggregations -{ - "size": 0, - "runtime_mappings": { - "weight.combined": { - "type": "double", - "script": """ - double s = 0; - for (double w : doc['weight']) { - s += w; - } - emit(s); - """ - } - }, - "aggs": { - "weighted_grade": { - "weighted_avg": { - "value": { - "script": "doc.grade.value + 1" - }, - "weight": { - "field": "weight.combined" - } - } - } - } -} ----- - -Which should look like: - -[source,console-result] ----- -{ - "aggregations": { - "weighted_grade": { - "value": 93.5 - } - } -} ----- - - -==== Missing values - -By default, the aggregation excludes documents with a missing or `null` value for the `value` or `weight` field. Use the - `missing` parameter to specify a default value for these documents instead. - -[source,console] --------------------------------------------------- -POST /exams/_search -{ - "size": 0, - "aggs": { - "weighted_grade": { - "weighted_avg": { - "value": { - "field": "grade", - "missing": 2 - }, - "weight": { - "field": "weight", - "missing": 3 - } - } - } - } -} --------------------------------------------------- -// TEST[setup:exams] diff --git a/docs/reference/aggregations/pipeline.asciidoc b/docs/reference/aggregations/pipeline.asciidoc deleted file mode 100644 index 7f2054bd2554f..0000000000000 --- a/docs/reference/aggregations/pipeline.asciidoc +++ /dev/null @@ -1,322 +0,0 @@ -[[search-aggregations-pipeline]] - -== Pipeline aggregations - -Pipeline aggregations work on the outputs produced from other aggregations rather than from document sets, adding -information to the output tree. There are many different types of pipeline aggregation, each computing different information from -other aggregations, but these types can be broken down into two families: - -_Parent_:: - A family of pipeline aggregations that is provided with the output of its parent aggregation and is able - to compute new buckets or new aggregations to add to existing buckets. - -_Sibling_:: - Pipeline aggregations that are provided with the output of a sibling aggregation and are able to compute a - new aggregation which will be at the same level as the sibling aggregation. - -Pipeline aggregations can reference the aggregations they need to perform their computation by using the `buckets_path` -parameter to indicate the paths to the required metrics. The syntax for defining these paths can be found in the -<> section below. - -Pipeline aggregations cannot have sub-aggregations but depending on the type it can reference another pipeline in the `buckets_path` -allowing pipeline aggregations to be chained. For example, you can chain together two derivatives to calculate the second derivative -(i.e. a derivative of a derivative). - -NOTE: Because pipeline aggregations only add to the output, when chaining pipeline aggregations the output of each pipeline aggregation -will be included in the final output. - -[[buckets-path-syntax]] -[discrete] -=== `buckets_path` Syntax - -Most pipeline aggregations require another aggregation as their input. The input aggregation is defined via the `buckets_path` -parameter, which follows a specific format: - -// https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form -[source,ebnf] --------------------------------------------------- -AGG_SEPARATOR = `>` ; -METRIC_SEPARATOR = `.` ; -AGG_NAME = ; -METRIC = ; -MULTIBUCKET_KEY = `[]` -PATH = ? (, )* ( , ) ; --------------------------------------------------- - -For example, the path `"my_bucket>my_stats.avg"` will path to the `avg` value in the `"my_stats"` metric, which is -contained in the `"my_bucket"` bucket aggregation. - -Here are some more examples: --- - * `multi_bucket["foo"]>single_bucket>multi_metric.avg` will go to the `avg` metric in the `"multi_metric"` agg under the - single bucket `"single_bucket"` within the `"foo"` bucket of the `"multi_bucket"` multi-bucket aggregation. - * `agg1["foo"]._count` will get the `_count` metric for the `"foo"` bucket in the - multi-bucket aggregation `"multi_bucket"` --- - -Paths are relative from the position of the pipeline aggregation; they are not absolute paths, and the path cannot go back "up" the -aggregation tree. For example, this derivative is embedded inside a date_histogram and refers to a "sibling" -metric `"the_sum"`: - -[source,console,id=buckets-path-example] --------------------------------------------------- -POST /_search -{ - "aggs": { - "my_date_histo": { - "date_histogram": { - "field": "timestamp", - "calendar_interval": "day" - }, - "aggs": { - "the_sum": { - "sum": { "field": "lemmings" } <1> - }, - "the_deriv": { - "derivative": { "buckets_path": "the_sum" } <2> - } - } - } - } -} --------------------------------------------------- - -<1> The metric is called `"the_sum"` -<2> The `buckets_path` refers to the metric via a relative path `"the_sum"` - -`buckets_path` is also used for Sibling pipeline aggregations, where the aggregation is "next" to a series of buckets -instead of embedded "inside" them. For example, the `max_bucket` aggregation uses the `buckets_path` to specify -a metric embedded inside a sibling aggregation: - -[source,console,id=buckets-path-sibling-example] --------------------------------------------------- -POST /_search -{ - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - } - } - }, - "max_monthly_sales": { - "max_bucket": { - "buckets_path": "sales_per_month>sales" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `buckets_path` instructs this max_bucket aggregation that we want the maximum value of the `sales` aggregation in the -`sales_per_month` date histogram. - -If a Sibling pipeline agg references a multi-bucket aggregation, such as a `terms` agg, it also has the option to -select specific keys from the multi-bucket. For example, a `bucket_script` could select two specific buckets (via -their bucket keys) to perform the calculation: - -[source,console,id=buckets-path-specific-bucket-example] --------------------------------------------------- -POST /_search -{ - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sale_type": { - "terms": { - "field": "type" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - } - } - }, - "hat_vs_bag_ratio": { - "bucket_script": { - "buckets_path": { - "hats": "sale_type['hat']>sales", <1> - "bags": "sale_type['bag']>sales" <1> - }, - "script": "params.hats / params.bags" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `buckets_path` selects the hats and bags buckets (via `['hat']`/`['bag']``) to use in the script specifically, -instead of fetching all the buckets from `sale_type` aggregation - -[discrete] -=== Special Paths - -Instead of pathing to a metric, `buckets_path` can use a special `"_count"` path. This instructs -the pipeline aggregation to use the document count as its input. For example, a derivative can be calculated -on the document count of each bucket, instead of a specific metric: - -[source,console,id=buckets-path-count-example] --------------------------------------------------- -POST /_search -{ - "aggs": { - "my_date_histo": { - "date_histogram": { - "field": "timestamp", - "calendar_interval": "day" - }, - "aggs": { - "the_deriv": { - "derivative": { "buckets_path": "_count" } <1> - } - } - } - } -} --------------------------------------------------- - -<1> By using `_count` instead of a metric name, we can calculate the derivative of document counts in the histogram - -The `buckets_path` can also use `"_bucket_count"` and path to a multi-bucket aggregation to use the number of buckets -returned by that aggregation in the pipeline aggregation instead of a metric. For example, a `bucket_selector` can be -used here to filter out buckets which contain no buckets for an inner terms aggregation: - -[source,console,id=buckets-path-bucket-count-example] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "histo": { - "date_histogram": { - "field": "date", - "calendar_interval": "day" - }, - "aggs": { - "categories": { - "terms": { - "field": "category" - } - }, - "min_bucket_selector": { - "bucket_selector": { - "buckets_path": { - "count": "categories._bucket_count" <1> - }, - "script": { - "source": "params.count != 0" - } - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> By using `_bucket_count` instead of a metric name, we can filter out `histo` buckets where they contain no buckets -for the `categories` aggregation - -[[dots-in-agg-names]] -[discrete] -=== Dealing with dots in agg names - -An alternate syntax is supported to cope with aggregations or metrics which -have dots in the name, such as the ++99.9++th -<>. This metric -may be referred to as: - -[source,js] ---------------- -"buckets_path": "my_percentile[99.9]" ---------------- -// NOTCONSOLE - -[[gap-policy]] -[discrete] -=== Dealing with gaps in the data - -Data in the real world is often noisy and sometimes contains *gaps* -- places where data simply doesn't exist. This can -occur for a variety of reasons, the most common being: - -* Documents falling into a bucket do not contain a required field -* There are no documents matching the query for one or more buckets -* The metric being calculated is unable to generate a value, likely because another dependent bucket is missing a value. -Some pipeline aggregations have specific requirements that must be met (e.g. a derivative cannot calculate a metric for the -first value because there is no previous value, HoltWinters moving average need "warmup" data to begin calculating, etc) - -Gap policies are a mechanism to inform the pipeline aggregation about the desired behavior when "gappy" or missing -data is encountered. All pipeline aggregations accept the `gap_policy` parameter. There are currently two gap policies -to choose from: - -_skip_:: - This option treats missing data as if the bucket does not exist. It will skip the bucket and continue - calculating using the next available value. - -_insert_zeros_:: - This option will replace missing values with a zero (`0`) and pipeline aggregation computation will - proceed as normal. - -_keep_values_:: - This option is similar to skip, except if the metric provides a non-null, non-NaN value this value is - used, otherwise the empty bucket is skipped. - -include::pipeline/avg-bucket-aggregation.asciidoc[] - -include::pipeline/bucket-script-aggregation.asciidoc[] - -include::pipeline/bucket-count-ks-test-aggregation.asciidoc[] - -include::pipeline/bucket-correlation-aggregation.asciidoc[] - -include::pipeline/bucket-selector-aggregation.asciidoc[] - -include::pipeline/bucket-sort-aggregation.asciidoc[] - -include::pipeline/change-point-aggregation.asciidoc[] - -include::pipeline/cumulative-cardinality-aggregation.asciidoc[] - -include::pipeline/cumulative-sum-aggregation.asciidoc[] - -include::pipeline/derivative-aggregation.asciidoc[] - -include::pipeline/extended-stats-bucket-aggregation.asciidoc[] - -include::pipeline/inference-bucket-aggregation.asciidoc[] - -include::pipeline/max-bucket-aggregation.asciidoc[] - -include::pipeline/min-bucket-aggregation.asciidoc[] - -include::pipeline/movfn-aggregation.asciidoc[] - -include::pipeline/moving-percentiles-aggregation.asciidoc[] - -include::pipeline/normalize-aggregation.asciidoc[] - -include::pipeline/percentiles-bucket-aggregation.asciidoc[] - -include::pipeline/serial-diff-aggregation.asciidoc[] - -include::pipeline/stats-bucket-aggregation.asciidoc[] - -include::pipeline/sum-bucket-aggregation.asciidoc[] diff --git a/docs/reference/aggregations/pipeline/avg-bucket-aggregation.asciidoc b/docs/reference/aggregations/pipeline/avg-bucket-aggregation.asciidoc deleted file mode 100644 index b2da59996d1cb..0000000000000 --- a/docs/reference/aggregations/pipeline/avg-bucket-aggregation.asciidoc +++ /dev/null @@ -1,139 +0,0 @@ -[[search-aggregations-pipeline-avg-bucket-aggregation]] -=== Average bucket aggregation -++++ -Average bucket -++++ - -A sibling pipeline aggregation which calculates the mean value of a specified -metric in a sibling aggregation. The specified metric must be numeric and the -sibling aggregation must be a multi-bucket aggregation. - -[[avg-bucket-agg-syntax]] -==== Syntax - -[source,js,indent=0] ----- -include::avg-bucket-aggregation.asciidoc[tag=avg-bucket-agg-syntax] ----- -// NOTCONSOLE - -[[avg-bucket-params]] -==== Parameters - -`buckets_path`:: -(Required, string) -Path to the buckets to average. For syntax, see <>. - -`gap_policy`:: -(Optional, string) -Policy to apply when gaps are found in the data. For valid values, see -<>. Defaults to `skip`. - -`format`:: -(Optional, string) -{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property. - -[[avg-bucket-agg-response]] -==== Response body - -`value`:: -(float) -Mean average value for the metric specified in `buckets_path`. - -`value_as_string`:: -(string) -Formatted output value for the aggregation. This property is only provided if -a `format` is specified in the request. - -[[avg-bucket-agg-ex]] -==== Example - -The following `avg_monthly_sales` aggregation uses `avg_bucket` to calculate -average sales per month: - -[source,console,subs="specialchars+"] ----- -POST _search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - } - } - }, - "avg_monthly_sales": { -// tag::avg-bucket-agg-syntax[] <1> - "avg_bucket": { - "buckets_path": "sales_per_month>sales", - "gap_policy": "skip", - "format": "#,##0.00;(#,##0.00)" - } -// end::avg-bucket-agg-syntax[] <2> - } - } -} ----- -// TEST[setup:sales] - -<1> Start of the `avg_bucket` configuration. Comment is not part of the example. -<2> End of the `avg_bucket` configuration. Comment is not part of the example. - -The request returns the following response: - -[source,console-result] ----- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550.0 - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60.0 - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "sales": { - "value": 375.0 - } - } - ] - }, - "avg_monthly_sales": { - "value": 328.33333333333333, - "value_as_string": "328.33" - } - } -} ----- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] diff --git a/docs/reference/aggregations/pipeline/bucket-correlation-aggregation.asciidoc b/docs/reference/aggregations/pipeline/bucket-correlation-aggregation.asciidoc deleted file mode 100644 index ae6ceb2f16c94..0000000000000 --- a/docs/reference/aggregations/pipeline/bucket-correlation-aggregation.asciidoc +++ /dev/null @@ -1,321 +0,0 @@ -[role="xpack"] -[[search-aggregations-bucket-correlation-aggregation]] -=== Bucket correlation aggregation -++++ -Bucket correlation -++++ - -A sibling pipeline aggregation which executes a correlation function on the -configured sibling multi-bucket aggregation. - - -[[bucket-correlation-agg-syntax]] -==== Parameters - -`buckets_path`:: -(Required, string) -Path to the buckets that contain one set of values to correlate. -For syntax, see <>. - -`function`:: -(Required, object) -The correlation function to execute. -+ -.Properties of `function` -[%collapsible%open] -==== -`count_correlation`::: -(Required^*^, object) -The configuration to calculate a count correlation. This function is designed for -determining the correlation of a term value and a given metric. Consequently, it -needs to meet the following requirements. -+ --- - -* The `buckets_path` must point to a `_count` metric. -* The total count of all the `bucket_path` count values must be less than or equal to `indicator.doc_count`. -* When utilizing this function, an initial calculation to gather the required `indicator` values is required. --- -+ -.Properties of `count_correlation` -[%collapsible%open] -===== -`indicator`::: -(Required, object) -The indicator with which to correlate the configured `bucket_path` values. -+ -.Properties of `indicator` -[%collapsible%open] -====== -`doc_count`::: -(Required, integer) -The total number of documents that initially created the `expectations`. It's required to be greater than or equal to the sum -of all values in the `buckets_path` as this is the originating superset of data to which the term values are correlated. - -`expectations`::: -(Required, array) -An array of numbers with which to correlate the configured `bucket_path` values. The length of this value must always equal -the number of buckets returned by the `bucket_path`. - -`fractions`::: -(Optional, array) -An array of fractions to use when averaging and calculating variance. This should be used if the pre-calculated data and the -`buckets_path` have known gaps. The length of `fractions`, if provided, must equal `expectations`. -====== -===== -==== - -==== Syntax - -A `bucket_correlation` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "bucket_correlation": { - "buckets_path": "range_values>_count", <1> - "function": { - "count_correlation": { <2> - "indicator": { - "expectations": [...], - "doc_count": 10000 - } - } - } - } -} --------------------------------------------------- -// NOTCONSOLE -<1> The buckets containing the values to correlate against. -<2> The correlation function definition. - - -[[bucket-correlation-agg-example]] -==== Example - -The following snippet correlates the individual terms in the field `version` with the `latency` metric. Not shown -is the pre-calculation of the `latency` indicator values, which was done utilizing the -<> aggregation. - -This example is only using the 10s percentiles. - -[source,console] -------------------------------------------------- -POST correlate_latency/_search?size=0&filter_path=aggregations -{ - "aggs": { - "buckets": { - "terms": { <1> - "field": "version", - "size": 2 - }, - "aggs": { - "latency_ranges": { - "range": { <2> - "field": "latency", - "ranges": [ - { "to": 0.0 }, - { "from": 0, "to": 105 }, - { "from": 105, "to": 225 }, - { "from": 225, "to": 445 }, - { "from": 445, "to": 665 }, - { "from": 665, "to": 885 }, - { "from": 885, "to": 1115 }, - { "from": 1115, "to": 1335 }, - { "from": 1335, "to": 1555 }, - { "from": 1555, "to": 1775 }, - { "from": 1775 } - ] - } - }, - "bucket_correlation": { <3> - "bucket_correlation": { - "buckets_path": "latency_ranges>_count", - "function": { - "count_correlation": { - "indicator": { - "expectations": [0, 52.5, 165, 335, 555, 775, 1000, 1225, 1445, 1665, 1775], - "doc_count": 200 - } - } - } - } - } - } - } - } -} -------------------------------------------------- -// TEST[setup:correlate_latency] - -<1> The term buckets containing a range aggregation and the bucket correlation aggregation. Both are utilized to calculate - the correlation of the term values with the latency. -<2> The range aggregation on the latency field. The ranges were created referencing the percentiles of the latency field. -<3> The bucket correlation aggregation that calculates the correlation of the number of term values within each range - and the previously calculated indicator values. - -And the following may be the response: - -[source,console-result] ----- -{ - "aggregations" : { - "buckets" : { - "doc_count_error_upper_bound" : 0, - "sum_other_doc_count" : 0, - "buckets" : [ - { - "key" : "1.0", - "doc_count" : 100, - "latency_ranges" : { - "buckets" : [ - { - "key" : "*-0.0", - "to" : 0.0, - "doc_count" : 0 - }, - { - "key" : "0.0-105.0", - "from" : 0.0, - "to" : 105.0, - "doc_count" : 1 - }, - { - "key" : "105.0-225.0", - "from" : 105.0, - "to" : 225.0, - "doc_count" : 9 - }, - { - "key" : "225.0-445.0", - "from" : 225.0, - "to" : 445.0, - "doc_count" : 0 - }, - { - "key" : "445.0-665.0", - "from" : 445.0, - "to" : 665.0, - "doc_count" : 0 - }, - { - "key" : "665.0-885.0", - "from" : 665.0, - "to" : 885.0, - "doc_count" : 0 - }, - { - "key" : "885.0-1115.0", - "from" : 885.0, - "to" : 1115.0, - "doc_count" : 10 - }, - { - "key" : "1115.0-1335.0", - "from" : 1115.0, - "to" : 1335.0, - "doc_count" : 20 - }, - { - "key" : "1335.0-1555.0", - "from" : 1335.0, - "to" : 1555.0, - "doc_count" : 20 - }, - { - "key" : "1555.0-1775.0", - "from" : 1555.0, - "to" : 1775.0, - "doc_count" : 20 - }, - { - "key" : "1775.0-*", - "from" : 1775.0, - "doc_count" : 20 - } - ] - }, - "bucket_correlation" : { - "value" : 0.8402398981360937 - } - }, - { - "key" : "2.0", - "doc_count" : 100, - "latency_ranges" : { - "buckets" : [ - { - "key" : "*-0.0", - "to" : 0.0, - "doc_count" : 0 - }, - { - "key" : "0.0-105.0", - "from" : 0.0, - "to" : 105.0, - "doc_count" : 19 - }, - { - "key" : "105.0-225.0", - "from" : 105.0, - "to" : 225.0, - "doc_count" : 11 - }, - { - "key" : "225.0-445.0", - "from" : 225.0, - "to" : 445.0, - "doc_count" : 20 - }, - { - "key" : "445.0-665.0", - "from" : 445.0, - "to" : 665.0, - "doc_count" : 20 - }, - { - "key" : "665.0-885.0", - "from" : 665.0, - "to" : 885.0, - "doc_count" : 20 - }, - { - "key" : "885.0-1115.0", - "from" : 885.0, - "to" : 1115.0, - "doc_count" : 10 - }, - { - "key" : "1115.0-1335.0", - "from" : 1115.0, - "to" : 1335.0, - "doc_count" : 0 - }, - { - "key" : "1335.0-1555.0", - "from" : 1335.0, - "to" : 1555.0, - "doc_count" : 0 - }, - { - "key" : "1555.0-1775.0", - "from" : 1555.0, - "to" : 1775.0, - "doc_count" : 0 - }, - { - "key" : "1775.0-*", - "from" : 1775.0, - "doc_count" : 0 - } - ] - }, - "bucket_correlation" : { - "value" : -0.5759855613334943 - } - } - ] - } - } -} ----- diff --git a/docs/reference/aggregations/pipeline/bucket-count-ks-test-aggregation.asciidoc b/docs/reference/aggregations/pipeline/bucket-count-ks-test-aggregation.asciidoc deleted file mode 100644 index 50185e1aec56c..0000000000000 --- a/docs/reference/aggregations/pipeline/bucket-count-ks-test-aggregation.asciidoc +++ /dev/null @@ -1,296 +0,0 @@ -[role="xpack"] -[[search-aggregations-bucket-count-ks-test-aggregation]] -=== Bucket count K-S test correlation aggregation -++++ -Bucket count K-S test -++++ - -A sibling pipeline aggregation which executes a two sample Kolmogorov–Smirnov test -(referred to as a "K-S test" from now on) against a provided distribution, and the -distribution implied by the documents counts in the configured sibling aggregation. -Specifically, for some metric, assuming that the percentile intervals of the metric are -known beforehand or have been computed by an aggregation, then one would use range -aggregation for the sibling to compute the p-value of the distribution difference between -the metric and the restriction of that metric to a subset of the documents. A natural use -case is if the sibling aggregation range aggregation nested in a terms aggregation, in -which case one compares the overall distribution of metric to its restriction to each term. - - -[[bucket-count-ks-test-agg-syntax]] -==== Parameters - -`buckets_path`:: -(Required, string) -Path to the buckets that contain one set of values to correlate. Must be a `_count` path -For syntax, see <>. - -`alternative`:: -(Optional, list) -A list of string values indicating which K-S test alternative to calculate. -The valid values are: "greater", "less", "two_sided". This parameter is key for -determining the K-S statistic used when calculating the K-S test. Default value is -all possible alternative hypotheses. - -`fractions`:: -(Optional, list) -A list of doubles indicating the distribution of the samples with which to compare to the -`buckets_path` results. In typical usage this is the overall proportion of documents in -each bucket, which is compared with the actual document proportions in each bucket -from the sibling aggregation counts. The default is to assume that overall documents -are uniformly distributed on these buckets, which they would be if one used equal -percentiles of a metric to define the bucket end points. - -`sampling_method`:: -(Optional, string) -Indicates the sampling methodology when calculating the K-S test. Note, this is sampling -of the returned values. This determines the cumulative distribution function (CDF) points -used comparing the two samples. Default is `upper_tail`, which emphasizes the upper -end of the CDF points. Valid options are: `upper_tail`, `uniform`, and `lower_tail`. - -==== Syntax - -A `bucket_count_ks_test` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "bucket_count_ks_test": { - "buckets_path": "range_values>_count", <1> - "alternative": ["less", "greater", "two_sided"], <2> - "sampling_method": "upper_tail" <3> - } -} --------------------------------------------------- -// NOTCONSOLE -<1> The buckets containing the values to test against. -<2> The alternatives to calculate. -<3> The sampling method for the K-S statistic. - - -[[bucket-count-ks-test-agg-example]] -==== Example - -The following snippet runs the `bucket_count_ks_test` on the individual terms in the field `version` against a uniform distribution. -The uniform distribution reflects the `latency` percentile buckets. Not shown is the pre-calculation of the `latency` indicator values, -which was done utilizing the -<> aggregation. - -This example is only using the deciles of `latency`. - -[source,console] -------------------------------------------------- -POST correlate_latency/_search?size=0&filter_path=aggregations -{ - "aggs": { - "buckets": { - "terms": { <1> - "field": "version", - "size": 2 - }, - "aggs": { - "latency_ranges": { - "range": { <2> - "field": "latency", - "ranges": [ - { "to": 0 }, - { "from": 0, "to": 105 }, - { "from": 105, "to": 225 }, - { "from": 225, "to": 445 }, - { "from": 445, "to": 665 }, - { "from": 665, "to": 885 }, - { "from": 885, "to": 1115 }, - { "from": 1115, "to": 1335 }, - { "from": 1335, "to": 1555 }, - { "from": 1555, "to": 1775 }, - { "from": 1775 } - ] - } - }, - "ks_test": { <3> - "bucket_count_ks_test": { - "buckets_path": "latency_ranges>_count", - "alternative": ["less", "greater", "two_sided"] - } - } - } - } - } -} -------------------------------------------------- -// TEST[setup:correlate_latency] - -<1> The term buckets containing a range aggregation and the bucket correlation aggregation. Both are utilized to calculate - the correlation of the term values with the latency. -<2> The range aggregation on the latency field. The ranges were created referencing the percentiles of the latency field. -<3> The bucket count K-S test aggregation that tests if the bucket counts comes from the same distribution as `fractions`; - where `fractions` is a uniform distribution. - -And the following may be the response: - -[source,console-result] ----- -{ - "aggregations" : { - "buckets" : { - "doc_count_error_upper_bound" : 0, - "sum_other_doc_count" : 0, - "buckets" : [ - { - "key" : "1.0", - "doc_count" : 100, - "latency_ranges" : { - "buckets" : [ - { - "key" : "*-0.0", - "to" : 0.0, - "doc_count" : 0 - }, - { - "key" : "0.0-105.0", - "from" : 0.0, - "to" : 105.0, - "doc_count" : 1 - }, - { - "key" : "105.0-225.0", - "from" : 105.0, - "to" : 225.0, - "doc_count" : 9 - }, - { - "key" : "225.0-445.0", - "from" : 225.0, - "to" : 445.0, - "doc_count" : 0 - }, - { - "key" : "445.0-665.0", - "from" : 445.0, - "to" : 665.0, - "doc_count" : 0 - }, - { - "key" : "665.0-885.0", - "from" : 665.0, - "to" : 885.0, - "doc_count" : 0 - }, - { - "key" : "885.0-1115.0", - "from" : 885.0, - "to" : 1115.0, - "doc_count" : 10 - }, - { - "key" : "1115.0-1335.0", - "from" : 1115.0, - "to" : 1335.0, - "doc_count" : 20 - }, - { - "key" : "1335.0-1555.0", - "from" : 1335.0, - "to" : 1555.0, - "doc_count" : 20 - }, - { - "key" : "1555.0-1775.0", - "from" : 1555.0, - "to" : 1775.0, - "doc_count" : 20 - }, - { - "key" : "1775.0-*", - "from" : 1775.0, - "doc_count" : 20 - } - ] - }, - "ks_test" : { - "less" : 2.248673241788478E-4, - "greater" : 1.0, - "two_sided" : 5.791639181800257E-4 - } - }, - { - "key" : "2.0", - "doc_count" : 100, - "latency_ranges" : { - "buckets" : [ - { - "key" : "*-0.0", - "to" : 0.0, - "doc_count" : 0 - }, - { - "key" : "0.0-105.0", - "from" : 0.0, - "to" : 105.0, - "doc_count" : 19 - }, - { - "key" : "105.0-225.0", - "from" : 105.0, - "to" : 225.0, - "doc_count" : 11 - }, - { - "key" : "225.0-445.0", - "from" : 225.0, - "to" : 445.0, - "doc_count" : 20 - }, - { - "key" : "445.0-665.0", - "from" : 445.0, - "to" : 665.0, - "doc_count" : 20 - }, - { - "key" : "665.0-885.0", - "from" : 665.0, - "to" : 885.0, - "doc_count" : 20 - }, - { - "key" : "885.0-1115.0", - "from" : 885.0, - "to" : 1115.0, - "doc_count" : 10 - }, - { - "key" : "1115.0-1335.0", - "from" : 1115.0, - "to" : 1335.0, - "doc_count" : 0 - }, - { - "key" : "1335.0-1555.0", - "from" : 1335.0, - "to" : 1555.0, - "doc_count" : 0 - }, - { - "key" : "1555.0-1775.0", - "from" : 1555.0, - "to" : 1775.0, - "doc_count" : 0 - }, - { - "key" : "1775.0-*", - "from" : 1775.0, - "doc_count" : 0 - } - ] - }, - "ks_test" : { - "less" : 0.9642895789647244, - "greater" : 4.58718174664754E-9, - "two_sided" : 5.916656831139733E-9 - } - } - ] - } - } -} ----- diff --git a/docs/reference/aggregations/pipeline/bucket-script-aggregation.asciidoc b/docs/reference/aggregations/pipeline/bucket-script-aggregation.asciidoc deleted file mode 100644 index 1a6c647cb5b3d..0000000000000 --- a/docs/reference/aggregations/pipeline/bucket-script-aggregation.asciidoc +++ /dev/null @@ -1,166 +0,0 @@ -[[search-aggregations-pipeline-bucket-script-aggregation]] -=== Bucket script aggregation -++++ -Bucket script -++++ - -A parent pipeline aggregation which executes a script which can perform per bucket computations on specified metrics -in the parent multi-bucket aggregation. The specified metric must be numeric and the script must return a numeric value. - -[[bucket-script-agg-syntax]] -==== Syntax - -A `bucket_script` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "bucket_script": { - "buckets_path": { - "my_var1": "the_sum", <1> - "my_var2": "the_value_count" - }, - "script": "params.my_var1 / params.my_var2" - } -} --------------------------------------------------- -// NOTCONSOLE -<1> Here, `my_var1` is the name of the variable for this buckets path to use in the script, `the_sum` is the path to -the metrics to use for that variable. - -[[bucket-script-params]] -.`bucket_script` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`script` |The script to run for this aggregation. The script can be inline, file or indexed. (see <> -for more details) |Required | -|`buckets_path` |A map of script variables and their associated path to the buckets we wish to use for the variable -(see <> for more details) |Required | -|`gap_policy` |The policy to apply when gaps are found in the data (see <> for more - details)|Optional |`skip` -|`format` |{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property |Optional |`null` -|=== - -The following snippet calculates the ratio percentage of t-shirt sales compared to total sales each month: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "total_sales": { - "sum": { - "field": "price" - } - }, - "t-shirts": { - "filter": { - "term": { - "type": "t-shirt" - } - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - } - } - }, - "t-shirt-percentage": { - "bucket_script": { - "buckets_path": { - "tShirtSales": "t-shirts>sales", - "totalSales": "total_sales" - }, - "script": "params.tShirtSales / params.totalSales * 100" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "total_sales": { - "value": 550.0 - }, - "t-shirts": { - "doc_count": 1, - "sales": { - "value": 200.0 - } - }, - "t-shirt-percentage": { - "value": 36.36363636363637 - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "total_sales": { - "value": 60.0 - }, - "t-shirts": { - "doc_count": 1, - "sales": { - "value": 10.0 - } - }, - "t-shirt-percentage": { - "value": 16.666666666666664 - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "total_sales": { - "value": 375.0 - }, - "t-shirts": { - "doc_count": 1, - "sales": { - "value": 175.0 - } - }, - "t-shirt-percentage": { - "value": 46.666666666666664 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] diff --git a/docs/reference/aggregations/pipeline/bucket-selector-aggregation.asciidoc b/docs/reference/aggregations/pipeline/bucket-selector-aggregation.asciidoc deleted file mode 100644 index e7b6d7f777de5..0000000000000 --- a/docs/reference/aggregations/pipeline/bucket-selector-aggregation.asciidoc +++ /dev/null @@ -1,119 +0,0 @@ -[[search-aggregations-pipeline-bucket-selector-aggregation]] -=== Bucket selector aggregation -++++ -Bucket selector -++++ - -A parent pipeline aggregation which executes a script which determines whether the current bucket will be retained -in the parent multi-bucket aggregation. The specified metric must be numeric and the script must return a boolean value. -If the script language is `expression` then a numeric return value is permitted. In this case 0.0 will be evaluated as `false` -and all other values will evaluate to true. - -NOTE: The bucket_selector aggregation, like all pipeline aggregations, executes after all other sibling aggregations. This means that -using the bucket_selector aggregation to filter the returned buckets in the response does not save on execution time running the aggregations. - -==== Syntax - -A `bucket_selector` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "bucket_selector": { - "buckets_path": { - "my_var1": "the_sum", <1> - "my_var2": "the_value_count" - }, - "script": "params.my_var1 > params.my_var2" - } -} --------------------------------------------------- -// NOTCONSOLE -<1> Here, `my_var1` is the name of the variable for this buckets path to use in the script, `the_sum` is the path to -the metrics to use for that variable. - -[[bucket-selector-params]] -.`bucket_selector` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`script` |The script to run for this aggregation. The script can be inline, file or indexed. (see <> -for more details) |Required | -|`buckets_path` |A map of script variables and their associated path to the buckets we wish to use for the variable -(see <> for more details) |Required | -|`gap_policy` |The policy to apply when gaps are found in the data (see <> for more - details)|Optional |`skip` -|=== - -The following snippet only retains buckets where the total sales for the month is more than 200: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "total_sales": { - "sum": { - "field": "price" - } - }, - "sales_bucket_filter": { - "bucket_selector": { - "buckets_path": { - "totalSales": "total_sales" - }, - "script": "params.totalSales > 200" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "total_sales": { - "value": 550.0 - } - },<1> - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "total_sales": { - "value": 375.0 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -<1> Bucket for `2015/02/01 00:00:00` has been removed as its total sales was less than 200 diff --git a/docs/reference/aggregations/pipeline/bucket-sort-aggregation.asciidoc b/docs/reference/aggregations/pipeline/bucket-sort-aggregation.asciidoc deleted file mode 100644 index 917eb3df8c95a..0000000000000 --- a/docs/reference/aggregations/pipeline/bucket-sort-aggregation.asciidoc +++ /dev/null @@ -1,190 +0,0 @@ -[[search-aggregations-pipeline-bucket-sort-aggregation]] -=== Bucket sort aggregation -++++ -Bucket sort -++++ - -A parent pipeline aggregation which sorts the buckets of its parent multi-bucket aggregation. -Zero or more sort fields may be specified together with the corresponding sort order. -Each bucket may be sorted based on its `_key`, `_count` or its sub-aggregations. -In addition, parameters `from` and `size` may be set in order to truncate the result buckets. - -NOTE: The `bucket_sort` aggregation, like all pipeline aggregations, is executed after all other non-pipeline aggregations. -This means the sorting only applies to whatever buckets are already returned from the parent aggregation. For example, -if the parent aggregation is `terms` and its `size` is set to `10`, the `bucket_sort` will only sort over those 10 -returned term buckets. - -==== Syntax - -A `bucket_sort` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "bucket_sort": { - "sort": [ - { "sort_field_1": { "order": "asc" } }, <1> - { "sort_field_2": { "order": "desc" } }, - "sort_field_3" - ], - "from": 1, - "size": 3 - } -} --------------------------------------------------- -// NOTCONSOLE -<1> Here, `sort_field_1` is the bucket path to the variable to be used as the primary sort and its order -is ascending. - -[[bucket-sort-params]] -.`bucket_sort` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`sort` |The list of fields to sort on. See <> for more details. |Optional | -|`from` |Buckets in positions prior to the set value will be truncated. |Optional | `0` -|`size` |The number of buckets to return. Defaults to all buckets of the parent aggregation. |Optional | -|`gap_policy` |The policy to apply when gaps are found in the data (see <> for more - details)|Optional |`skip` -|=== - -The following snippet returns the buckets corresponding to the 3 months with the highest total sales in descending order: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "total_sales": { - "sum": { - "field": "price" - } - }, - "sales_bucket_sort": { - "bucket_sort": { - "sort": [ - { "total_sales": { "order": "desc" } } <1> - ], - "size": 3 <2> - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `sort` is set to use the values of `total_sales` in descending order -<2> `size` is set to `3` meaning only the top 3 months in `total_sales` will be returned - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 82, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "total_sales": { - "value": 550.0 - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "total_sales": { - "value": 375.0 - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "total_sales": { - "value": 60.0 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 82/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -==== Truncating without sorting - -It is also possible to use this aggregation in order to truncate the result buckets -without doing any sorting. To do so, just use the `from` and/or `size` parameters -without specifying `sort`. - -The following example simply truncates the result so that only the second bucket is returned: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "bucket_truncate": { - "bucket_sort": { - "from": 1, - "size": 1 - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2 - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] diff --git a/docs/reference/aggregations/pipeline/change-point-aggregation.asciidoc b/docs/reference/aggregations/pipeline/change-point-aggregation.asciidoc deleted file mode 100644 index 73763f1b4ec17..0000000000000 --- a/docs/reference/aggregations/pipeline/change-point-aggregation.asciidoc +++ /dev/null @@ -1,152 +0,0 @@ -[role="xpack"] -[[search-aggregations-change-point-aggregation]] -=== Change point aggregation -++++ -Change point -++++ - -experimental::[] - -A sibling pipeline that detects, spikes, dips, and change points in a metric. -Given a distribution of values provided by the sibling multi-bucket aggregation, -this aggregation indicates the bucket of any spike or dip and/or the bucket at -which the largest change in the distribution of values, if they are -statistically significant. - -TIP: It is recommended to use the change point aggregation to detect changes in -time-based data, however, you can use any metric to create buckets. - - - -[[change-point-agg-syntax]] -==== Parameters - -`buckets_path`:: -(Required, string) -Path to the buckets that contain one set of values in which to detect a change -point. There must be at least 22 bucketed values. Fewer than 1,000 is preferred. -For syntax, see <>. - -==== Syntax - -A `change_point` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "change_point": { - "buckets_path": "date_histogram>_count" <1> - } -} --------------------------------------------------- -// NOTCONSOLE -<1> The buckets containing the values to test against. - -[[change-point-agg-response]] -==== Response body - -`bucket`:: -(Optional, object) -Values of the bucket that indicates the discovered change point. Not returned if -no change point was found. All the aggregations in the bucket are returned as -well. -+ -.Properties of bucket -[%collapsible%open] -==== -`key`::: -(value) -The key of the bucket matched. Could be string or numeric. - -`doc_count`::: -(number) -The document count of the bucket. -==== - -`type`:: -(object) -The found change point type and its related values. Possible types: -+ --- -* `dip`: a significant dip occurs at this change point -* `distribution_change`: the overall distribution of the values has changed -significantly -* `non_stationary`: there is no change point, but the values are not from a -stationary distribution -* `spike`: a significant spike occurs at this point -* `stationary`: no change point found -* `step_change`: the change indicates a statistically significant step up or -down in value distribution -* `trend_change`: there is an overall trend change occurring at this point --- - -==== Example - - -The following example uses the Kibana sample data logs data set. - -[source,js] --------------------------------------------------- -GET kibana_sample_data_logs/_search -{ - "aggs": { - "date":{ <1> - "date_histogram": { - "field": "@timestamp", - "fixed_interval": "1d" - }, - "aggs": { - "avg": { <2> - "avg": { - "field": "bytes" - } - } - } - }, - "change_points_avg": { <3> - "change_point": { - "buckets_path": "date>avg" <4> - } - } - } -} --------------------------------------------------- -// NOTCONSOLE -<1> A date histogram aggregation that creates buckets with one day long -interval. -<2> A sibling aggregation of the `date` aggregation that calculates the average -value of the `bytes` field within every bucket. -<3> The change point detection aggregation configuration object. -<4> The path of the aggregation values to detect change points. In this case, -the input of the change point aggregation is the value of `avg` which is a -sibling aggregation of `date`. - - -The request returns a response that is similar to the following: - -[source,js] --------------------------------------------------- - "change_points_avg" : { - "bucket" : { - "key" : "2023-04-29T00:00:00.000Z", <1> - "doc_count" : 329, <2> - "avg" : { <3> - "value" : 4737.209726443769 - } - }, - "type" : { <4> - "dip" : { - "p_value" : 3.8999455212466465e-10, <5> - "change_point" : 41 <6> - } - } - } --------------------------------------------------- -// NOTCONSOLE -<1> The bucket key that is the change point. -<2> The number of documents in that bucket. -<3> Aggregated values in the bucket. -<4> Type of change found. -<5> The `p_value` indicates how extreme the change is; lower values indicate -greater change. -<6> The specific bucket where the change occurs (indexing starts at `0`). diff --git a/docs/reference/aggregations/pipeline/cumulative-cardinality-aggregation.asciidoc b/docs/reference/aggregations/pipeline/cumulative-cardinality-aggregation.asciidoc deleted file mode 100644 index bb2c9d1b8adef..0000000000000 --- a/docs/reference/aggregations/pipeline/cumulative-cardinality-aggregation.asciidoc +++ /dev/null @@ -1,237 +0,0 @@ -[role="xpack"] -[[search-aggregations-pipeline-cumulative-cardinality-aggregation]] -=== Cumulative cardinality aggregation -++++ -Cumulative cardinality -++++ - -A parent pipeline aggregation which calculates the Cumulative Cardinality in a parent histogram (or date_histogram) -aggregation. The specified metric must be a cardinality aggregation and the enclosing histogram -must have `min_doc_count` set to `0` (default for `histogram` aggregations). - -The `cumulative_cardinality` agg is useful for finding "total new items", like the number of new visitors to your -website each day. A regular cardinality aggregation will tell you how many unique visitors came each day, but doesn't -differentiate between "new" or "repeat" visitors. The Cumulative Cardinality aggregation can be used to determine -how many of each day's unique visitors are "new". - -==== Syntax - -A `cumulative_cardinality` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "cumulative_cardinality": { - "buckets_path": "my_cardinality_agg" - } -} --------------------------------------------------- -// NOTCONSOLE - -[[cumulative-cardinality-params]] -.`cumulative_cardinality` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |The path to the cardinality aggregation we wish to find the cumulative cardinality for (see <> for more - details) |Required | -|`format` |{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property |Optional |`null` -|=== - -The following snippet calculates the cumulative cardinality of the total daily `users`: - -[source,console] --------------------------------------------------- -GET /user_hits/_search -{ - "size": 0, - "aggs": { - "users_per_day": { - "date_histogram": { - "field": "timestamp", - "calendar_interval": "day" - }, - "aggs": { - "distinct_users": { - "cardinality": { - "field": "user_id" - } - }, - "total_new_users": { - "cumulative_cardinality": { - "buckets_path": "distinct_users" <1> - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:user_hits] - -<1> `buckets_path` instructs this aggregation to use the output of the `distinct_users` aggregation for the cumulative cardinality - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "users_per_day": { - "buckets": [ - { - "key_as_string": "2019-01-01T00:00:00.000Z", - "key": 1546300800000, - "doc_count": 2, - "distinct_users": { - "value": 2 - }, - "total_new_users": { - "value": 2 - } - }, - { - "key_as_string": "2019-01-02T00:00:00.000Z", - "key": 1546387200000, - "doc_count": 2, - "distinct_users": { - "value": 2 - }, - "total_new_users": { - "value": 3 - } - }, - { - "key_as_string": "2019-01-03T00:00:00.000Z", - "key": 1546473600000, - "doc_count": 3, - "distinct_users": { - "value": 3 - }, - "total_new_users": { - "value": 4 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - - -Note how the second day, `2019-01-02`, has two distinct users but the `total_new_users` metric generated by the -cumulative pipeline agg only increments to three. This means that only one of the two users that day were -new, the other had already been seen in the previous day. This happens again on the third day, where only -one of three users is completely new. - -==== Incremental cumulative cardinality - -The `cumulative_cardinality` agg will show you the total, distinct count since the beginning of the time period -being queried. Sometimes, however, it is useful to see the "incremental" count. Meaning, how many new users -are added each day, rather than the total cumulative count. - -This can be accomplished by adding a `derivative` aggregation to our query: - -[source,console] --------------------------------------------------- -GET /user_hits/_search -{ - "size": 0, - "aggs": { - "users_per_day": { - "date_histogram": { - "field": "timestamp", - "calendar_interval": "day" - }, - "aggs": { - "distinct_users": { - "cardinality": { - "field": "user_id" - } - }, - "total_new_users": { - "cumulative_cardinality": { - "buckets_path": "distinct_users" - } - }, - "incremental_new_users": { - "derivative": { - "buckets_path": "total_new_users" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:user_hits] - - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "users_per_day": { - "buckets": [ - { - "key_as_string": "2019-01-01T00:00:00.000Z", - "key": 1546300800000, - "doc_count": 2, - "distinct_users": { - "value": 2 - }, - "total_new_users": { - "value": 2 - } - }, - { - "key_as_string": "2019-01-02T00:00:00.000Z", - "key": 1546387200000, - "doc_count": 2, - "distinct_users": { - "value": 2 - }, - "total_new_users": { - "value": 3 - }, - "incremental_new_users": { - "value": 1.0 - } - }, - { - "key_as_string": "2019-01-03T00:00:00.000Z", - "key": 1546473600000, - "doc_count": 3, - "distinct_users": { - "value": 3 - }, - "total_new_users": { - "value": 4 - }, - "incremental_new_users": { - "value": 1.0 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] diff --git a/docs/reference/aggregations/pipeline/cumulative-sum-aggregation.asciidoc b/docs/reference/aggregations/pipeline/cumulative-sum-aggregation.asciidoc deleted file mode 100644 index 12136a560a7a7..0000000000000 --- a/docs/reference/aggregations/pipeline/cumulative-sum-aggregation.asciidoc +++ /dev/null @@ -1,122 +0,0 @@ -[[search-aggregations-pipeline-cumulative-sum-aggregation]] -=== Cumulative sum aggregation -++++ -Cumulative sum -++++ - -A parent pipeline aggregation which calculates the cumulative sum of a specified metric in a parent histogram (or date_histogram) -aggregation. The specified metric must be numeric and the enclosing histogram must have `min_doc_count` set to `0` (default -for `histogram` aggregations). - -==== Syntax - -A `cumulative_sum` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "cumulative_sum": { - "buckets_path": "the_sum" - } -} --------------------------------------------------- -// NOTCONSOLE - -[[cumulative-sum-params]] -.`cumulative_sum` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |The path to the buckets we wish to find the cumulative sum for (see <> for more - details) |Required | -|`format` |{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property |Optional |`null` -|=== - -The following snippet calculates the cumulative sum of the total monthly `sales`: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - }, - "cumulative_sales": { - "cumulative_sum": { - "buckets_path": "sales" <1> - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `buckets_path` instructs this cumulative sum aggregation to use the output of the `sales` aggregation for the cumulative sum - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550.0 - }, - "cumulative_sales": { - "value": 550.0 - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60.0 - }, - "cumulative_sales": { - "value": 610.0 - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "sales": { - "value": 375.0 - }, - "cumulative_sales": { - "value": 985.0 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] diff --git a/docs/reference/aggregations/pipeline/derivative-aggregation.asciidoc b/docs/reference/aggregations/pipeline/derivative-aggregation.asciidoc deleted file mode 100644 index cfa44773c2733..0000000000000 --- a/docs/reference/aggregations/pipeline/derivative-aggregation.asciidoc +++ /dev/null @@ -1,319 +0,0 @@ -[[search-aggregations-pipeline-derivative-aggregation]] -=== Derivative aggregation -++++ -Derivative -++++ - -A parent pipeline aggregation which calculates the derivative of a specified metric in a parent histogram (or date_histogram) -aggregation. The specified metric must be numeric and the enclosing histogram must have `min_doc_count` set to `0` (default -for `histogram` aggregations). - -==== Syntax - -A `derivative` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -"derivative": { - "buckets_path": "the_sum" -} --------------------------------------------------- -// NOTCONSOLE - -[[derivative-params]] -.`derivative` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |The path to the buckets we wish to find the derivative for (see <> for more - details) |Required | - |`gap_policy` |The policy to apply when gaps are found in the data (see <> for more - details)|Optional |`skip` - |`format` |{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property |Optional | `null` -|=== - - -==== First Order Derivative - -The following snippet calculates the derivative of the total monthly `sales`: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - }, - "sales_deriv": { - "derivative": { - "buckets_path": "sales" <1> - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `buckets_path` instructs this derivative aggregation to use the output of the `sales` aggregation for the derivative - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550.0 - } <1> - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60.0 - }, - "sales_deriv": { - "value": -490.0 <2> - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, <3> - "sales": { - "value": 375.0 - }, - "sales_deriv": { - "value": 315.0 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -<1> No derivative for the first bucket since we need at least 2 data points to calculate the derivative -<2> Derivative value units are implicitly defined by the `sales` aggregation and the parent histogram so in this case the units -would be $/month assuming the `price` field has units of $. -<3> The number of documents in the bucket are represented by the `doc_count` - -==== Second Order Derivative - -A second order derivative can be calculated by chaining the derivative pipeline aggregation onto the result of another derivative -pipeline aggregation as in the following example which will calculate both the first and the second order derivative of the total -monthly sales: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - }, - "sales_deriv": { - "derivative": { - "buckets_path": "sales" - } - }, - "sales_2nd_deriv": { - "derivative": { - "buckets_path": "sales_deriv" <1> - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `buckets_path` for the second derivative points to the name of the first derivative - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 50, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550.0 - } <1> - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60.0 - }, - "sales_deriv": { - "value": -490.0 - } <1> - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "sales": { - "value": 375.0 - }, - "sales_deriv": { - "value": 315.0 - }, - "sales_2nd_deriv": { - "value": 805.0 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 50/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -<1> No second derivative for the first two buckets since we need at least 2 data points from the first derivative to calculate the -second derivative - -==== Units - -The derivative aggregation allows the units of the derivative values to be specified. This returns an extra field in the response -`normalized_value` which reports the derivative value in the desired x-axis units. In the below example we calculate the derivative -of the total sales per month but ask for the derivative of the sales as in the units of sales per day: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - }, - "sales_deriv": { - "derivative": { - "buckets_path": "sales", - "unit": "day" <1> - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] -<1> `unit` specifies what unit to use for the x-axis of the derivative calculation - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 50, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550.0 - } <1> - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60.0 - }, - "sales_deriv": { - "value": -490.0, <1> - "normalized_value": -15.806451612903226 <2> - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "sales": { - "value": 375.0 - }, - "sales_deriv": { - "value": 315.0, - "normalized_value": 11.25 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 50/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -<1> `value` is reported in the original units of 'per month' -<2> `normalized_value` is reported in the desired units of 'per day' diff --git a/docs/reference/aggregations/pipeline/extended-stats-bucket-aggregation.asciidoc b/docs/reference/aggregations/pipeline/extended-stats-bucket-aggregation.asciidoc deleted file mode 100644 index b3012d0207ef4..0000000000000 --- a/docs/reference/aggregations/pipeline/extended-stats-bucket-aggregation.asciidoc +++ /dev/null @@ -1,140 +0,0 @@ -[[search-aggregations-pipeline-extended-stats-bucket-aggregation]] -=== Extended stats bucket aggregation -++++ -Extended stats bucket -++++ - -A sibling pipeline aggregation which calculates a variety of stats across all bucket of a specified metric in a sibling aggregation. -The specified metric must be numeric and the sibling aggregation must be a multi-bucket aggregation. - -This aggregation provides a few more statistics (sum of squares, standard deviation, etc) compared to the `stats_bucket` aggregation. - -==== Syntax - -A `extended_stats_bucket` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "extended_stats_bucket": { - "buckets_path": "the_sum" - } -} --------------------------------------------------- -// NOTCONSOLE - -[[extended-stats-bucket-params]] -.`extended_stats_bucket` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |The path to the buckets we wish to calculate stats for (see <> for more - details) |Required | -|`gap_policy` |The policy to apply when gaps are found in the data (see <> for more - details)|Optional | `skip` -|`format` |{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property |Optional | `null` -|`sigma` |The number of standard deviations above/below the mean to display |Optional | 2 -|=== - -The following snippet calculates the extended stats for monthly `sales` bucket: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - } - } - }, - "stats_monthly_sales": { - "extended_stats_bucket": { - "buckets_path": "sales_per_month>sales" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `bucket_paths` instructs this `extended_stats_bucket` aggregation that we want the calculate stats for the `sales` aggregation in the -`sales_per_month` date histogram. - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550.0 - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60.0 - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "sales": { - "value": 375.0 - } - } - ] - }, - "stats_monthly_sales": { - "count": 3, - "min": 60.0, - "max": 550.0, - "avg": 328.3333333333333, - "sum": 985.0, - "sum_of_squares": 446725.0, - "variance": 41105.55555555556, - "variance_population": 41105.55555555556, - "variance_sampling": 61658.33333333334, - "std_deviation": 202.74505063146563, - "std_deviation_population": 202.74505063146563, - "std_deviation_sampling": 248.3109609609156, - "std_deviation_bounds": { - "upper": 733.8234345962646, - "lower": -77.15676792959795, - "upper_population" : 733.8234345962646, - "lower_population" : -77.15676792959795, - "upper_sampling" : 824.9552552551645, - "lower_sampling" : -168.28858858849787 - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] diff --git a/docs/reference/aggregations/pipeline/inference-bucket-aggregation.asciidoc b/docs/reference/aggregations/pipeline/inference-bucket-aggregation.asciidoc deleted file mode 100644 index 064881925e8d8..0000000000000 --- a/docs/reference/aggregations/pipeline/inference-bucket-aggregation.asciidoc +++ /dev/null @@ -1,184 +0,0 @@ -[role="xpack"] -[[search-aggregations-pipeline-inference-bucket-aggregation]] -=== {infer-cap} bucket aggregation -++++ -{infer-cap} bucket -++++ - - -A parent pipeline aggregation which loads a pre-trained model and performs -{infer} on the collated result fields from the parent bucket aggregation. - -To use the {infer} bucket aggregation, you need to have the same security -privileges that are required for using the -<>. - -[[inference-bucket-agg-syntax]] -==== Syntax - -A `inference` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "inference": { - "model_id": "a_model_for_inference", <1> - "inference_config": { <2> - "regression_config": { - "num_top_feature_importance_values": 2 - } - }, - "buckets_path": { - "avg_cost": "avg_agg", <3> - "max_cost": "max_agg" - } - } -} --------------------------------------------------- -// NOTCONSOLE -<1> The unique identifier or alias for the trained model. -<2> The optional inference config which overrides the model's default settings -<3> Map the value of `avg_agg` to the model's input field `avg_cost` - - -[[inference-bucket-params]] -.`inference` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -| `model_id` | The ID or alias for the trained model. | Required | - -| `inference_config` | Contains the inference type and its options. There are two types: <> and <> | Optional | - -| `buckets_path` | Defines the paths to the input aggregations and maps the aggregation names to the field names expected by the model. -See <> for more details | Required | - -|=== - - -==== Configuration options for {infer} models - -The `inference_config` setting is optional and usually isn't required as the -pre-trained models come equipped with sensible defaults. In the context of -aggregations some options can be overridden for each of the two types of model. - -[discrete] -[[inference-agg-regression-opt]] -===== Configuration options for {regression} models - -`num_top_feature_importance_values`:: -(Optional, integer) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-regression-num-top-feature-importance-values] - -[discrete] -[[inference-agg-classification-opt]] -===== Configuration options for {classification} models - -`num_top_classes`:: -(Optional, integer) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-classification-num-top-classes] - -`num_top_feature_importance_values`:: -(Optional, integer) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-classification-num-top-feature-importance-values] - -`prediction_field_type`:: -(Optional, string) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-classification-prediction-field-type] - - -[[inference-bucket-agg-example]] -==== Example - -The following snippet aggregates a web log by `client_ip` and extracts a number -of features via metric and bucket sub-aggregations as input to the {infer} -aggregation configured with a model trained to identify suspicious client IPs: - -[source,console] -------------------------------------------------- -GET kibana_sample_data_logs/_search -{ - "size": 0, - "aggs": { - "client_ip": { <1> - "composite": { - "sources": [ - { - "client_ip": { - "terms": { - "field": "clientip" - } - } - } - ] - }, - "aggs": { <2> - "url_dc": { - "cardinality": { - "field": "url.keyword" - } - }, - "bytes_sum": { - "sum": { - "field": "bytes" - } - }, - "geo_src_dc": { - "cardinality": { - "field": "geo.src" - } - }, - "geo_dest_dc": { - "cardinality": { - "field": "geo.dest" - } - }, - "responses_total": { - "value_count": { - "field": "timestamp" - } - }, - "success": { - "filter": { - "term": { - "response": "200" - } - } - }, - "error404": { - "filter": { - "term": { - "response": "404" - } - } - }, - "error503": { - "filter": { - "term": { - "response": "503" - } - } - }, - "malicious_client_ip": { <3> - "inference": { - "model_id": "malicious_clients_model", - "buckets_path": { - "response_count": "responses_total", - "url_dc": "url_dc", - "bytes_sum": "bytes_sum", - "geo_src_dc": "geo_src_dc", - "geo_dest_dc": "geo_dest_dc", - "success": "success._count", - "error404": "error404._count", - "error503": "error503._count" - } - } - } - } - } - } -} -------------------------------------------------- -// TEST[skip:setup kibana sample data] - -<1> A composite bucket aggregation that aggregates the data by `client_ip`. -<2> A series of metrics and bucket sub-aggregations. -<3> {infer-cap} bucket aggregation that specifies the trained model and maps the -aggregation names to the model's input fields. diff --git a/docs/reference/aggregations/pipeline/max-bucket-aggregation.asciidoc b/docs/reference/aggregations/pipeline/max-bucket-aggregation.asciidoc deleted file mode 100644 index 6ee418ff9f778..0000000000000 --- a/docs/reference/aggregations/pipeline/max-bucket-aggregation.asciidoc +++ /dev/null @@ -1,122 +0,0 @@ -[[search-aggregations-pipeline-max-bucket-aggregation]] -=== Max bucket aggregation -++++ -Max bucket -++++ - -A sibling pipeline aggregation which identifies the bucket(s) with the maximum value of a specified metric in a sibling aggregation -and outputs both the value and the key(s) of the bucket(s). The specified metric must be numeric and the sibling aggregation must -be a multi-bucket aggregation. - -==== Syntax - -A `max_bucket` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "max_bucket": { - "buckets_path": "the_sum" - } -} --------------------------------------------------- -// NOTCONSOLE - -[[max-bucket-params]] -.`max_bucket` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |The path to the buckets we wish to find the maximum for (see <> for more - details) |Required | -|`gap_policy` |The policy to apply when gaps are found in the data (see <> for more - details)|Optional | `skip` - |`format` |{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property |Optional |`null` -|=== - -The following snippet calculates the maximum of the total monthly `sales`: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - } - } - }, - "max_monthly_sales": { - "max_bucket": { - "buckets_path": "sales_per_month>sales" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `buckets_path` instructs this max_bucket aggregation that we want the maximum value of the `sales` aggregation in the -`sales_per_month` date histogram. - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550.0 - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60.0 - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "sales": { - "value": 375.0 - } - } - ] - }, - "max_monthly_sales": { - "keys": ["2015/01/01 00:00:00"], <1> - "value": 550.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -<1> `keys` is an array of strings since the maximum value may be present in multiple buckets diff --git a/docs/reference/aggregations/pipeline/min-bucket-aggregation.asciidoc b/docs/reference/aggregations/pipeline/min-bucket-aggregation.asciidoc deleted file mode 100644 index a0e6d61685c37..0000000000000 --- a/docs/reference/aggregations/pipeline/min-bucket-aggregation.asciidoc +++ /dev/null @@ -1,122 +0,0 @@ -[[search-aggregations-pipeline-min-bucket-aggregation]] -=== Min bucket aggregation -++++ -Min bucket -++++ - -A sibling pipeline aggregation which identifies the bucket(s) with the minimum value of a specified metric in a sibling aggregation -and outputs both the value and the key(s) of the bucket(s). The specified metric must be numeric and the sibling aggregation must -be a multi-bucket aggregation. - -==== Syntax - -A `min_bucket` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "min_bucket": { - "buckets_path": "the_sum" - } -} --------------------------------------------------- -// NOTCONSOLE - -[[min-bucket-params]] -.`min_bucket` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |The path to the buckets we wish to find the minimum for (see <> for more - details) |Required | - |`gap_policy` |The policy to apply when gaps are found in the data (see <> for more - details)|Optional | `skip` - |`format` |{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property |Optional |`null` -|=== - -The following snippet calculates the minimum of the total monthly `sales`: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - } - } - }, - "min_monthly_sales": { - "min_bucket": { - "buckets_path": "sales_per_month>sales" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `buckets_path` instructs this min_bucket aggregation that we want the minimum value of the `sales` aggregation in the -`sales_per_month` date histogram. - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550.0 - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60.0 - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "sales": { - "value": 375.0 - } - } - ] - }, - "min_monthly_sales": { - "keys": ["2015/02/01 00:00:00"], <1> - "value": 60.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -<1> `keys` is an array of strings since the minimum value may be present in multiple buckets diff --git a/docs/reference/aggregations/pipeline/movfn-aggregation.asciidoc b/docs/reference/aggregations/pipeline/movfn-aggregation.asciidoc deleted file mode 100644 index 44a00b9f5b99e..0000000000000 --- a/docs/reference/aggregations/pipeline/movfn-aggregation.asciidoc +++ /dev/null @@ -1,657 +0,0 @@ -[[search-aggregations-pipeline-movfn-aggregation]] -=== Moving function aggregation -++++ -Moving function -++++ - -Given an ordered series of data, the Moving Function aggregation will slide a window across the data and allow the user to specify a custom -script that is executed on each window of data. For convenience, a number of common functions are predefined such as min/max, moving averages, -etc. - -==== Syntax - -A `moving_fn` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "moving_fn": { - "buckets_path": "the_sum", - "window": 10, - "script": "MovingFunctions.min(values)" - } -} --------------------------------------------------- -// NOTCONSOLE - -[[moving-fn-params]] -.`moving_fn` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |Path to the metric of interest (see <> for more details |Required | -|`window` |The size of window to "slide" across the histogram. |Required | -|`script` |The script that should be executed on each window of data |Required | -|`gap_policy` |The policy to apply when gaps are found in the data. See <>. |Optional |`skip` -|`shift` |<> of window position. |Optional | 0 -|=== - -`moving_fn` aggregations must be embedded inside of a `histogram` or `date_histogram` aggregation. They can be -embedded like any other metric aggregation: - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { <1> - "date_histogram": { - "field": "date", - "calendar_interval": "1M" - }, - "aggs": { - "the_sum": { - "sum": { "field": "price" } <2> - }, - "the_movfn": { - "moving_fn": { - "buckets_path": "the_sum", <3> - "window": 10, - "script": "MovingFunctions.unweightedAvg(values)" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> A `date_histogram` named "my_date_histo" is constructed on the "timestamp" field, with one-month intervals -<2> A `sum` metric is used to calculate the sum of a field. This could be any numeric metric (sum, min, max, etc) -<3> Finally, we specify a `moving_fn` aggregation which uses "the_sum" metric as its input. - -Moving averages are built by first specifying a `histogram` or `date_histogram` over a field. You can then optionally -add numeric metrics, such as a `sum`, inside of that histogram. Finally, the `moving_fn` is embedded inside the histogram. -The `buckets_path` parameter is then used to "point" at one of the sibling metrics inside of the histogram (see -<> for a description of the syntax for `buckets_path`. - -An example response from the above aggregation may look like: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "my_date_histo": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "the_sum": { - "value": 550.0 - }, - "the_movfn": { - "value": null - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "the_sum": { - "value": 60.0 - }, - "the_movfn": { - "value": 550.0 - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "the_sum": { - "value": 375.0 - }, - "the_movfn": { - "value": 305.0 - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - - -==== Custom user scripting - -The Moving Function aggregation allows the user to specify any arbitrary script to define custom logic. The script is invoked each time a -new window of data is collected. These values are provided to the script in the `values` variable. The script should then perform some -kind of calculation and emit a single `double` as the result. Emitting `null` is not permitted, although `NaN` and +/- `Inf` are allowed. - -For example, this script will simply return the first value from the window, or `NaN` if no values are available: - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { - "date_histogram": { - "field": "date", - "calendar_interval": "1M" - }, - "aggs": { - "the_sum": { - "sum": { "field": "price" } - }, - "the_movavg": { - "moving_fn": { - "buckets_path": "the_sum", - "window": 10, - "script": "return values.length > 0 ? values[0] : Double.NaN" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -[[shift-parameter]] -==== shift parameter - -By default (with `shift = 0`), the window that is offered for calculation is the last `n` values excluding the current bucket. -Increasing `shift` by 1 moves starting window position by `1` to the right. - -- To include current bucket to the window, use `shift = 1`. -- For center alignment (`n / 2` values before and after the current bucket), use `shift = window / 2`. -- For right alignment (`n` values after the current bucket), use `shift = window`. - -If either of window edges moves outside the borders of data series, the window shrinks to include available values only. - -==== Pre-built Functions - -For convenience, a number of functions have been prebuilt and are available inside the `moving_fn` script context: - -- `max()` -- `min()` -- `sum()` -- `stdDev()` -- `unweightedAvg()` -- `linearWeightedAvg()` -- `ewma()` -- `holt()` -- `holtWinters()` - -The functions are available from the `MovingFunctions` namespace. E.g. `MovingFunctions.max()` - -===== max Function - -This function accepts a collection of doubles and returns the maximum value in that window. `null` and `NaN` values are ignored; the maximum -is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `NaN` is returned as the result. - -[[max-params]] -.`max(double[] values)` Parameters -[options="header"] -|=== -|Parameter Name |Description -|`values` |The window of values to find the maximum -|=== - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { - "date_histogram": { - "field": "date", - "calendar_interval": "1M" - }, - "aggs": { - "the_sum": { - "sum": { "field": "price" } - }, - "the_moving_max": { - "moving_fn": { - "buckets_path": "the_sum", - "window": 10, - "script": "MovingFunctions.max(values)" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -===== min Function - -This function accepts a collection of doubles and returns the minimum value in that window. `null` and `NaN` values are ignored; the minimum -is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `NaN` is returned as the result. - -[[min-params]] -.`min(double[] values)` Parameters -[options="header"] -|=== -|Parameter Name |Description -|`values` |The window of values to find the minimum -|=== - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { - "date_histogram": { - "field": "date", - "calendar_interval": "1M" - }, - "aggs": { - "the_sum": { - "sum": { "field": "price" } - }, - "the_moving_min": { - "moving_fn": { - "buckets_path": "the_sum", - "window": 10, - "script": "MovingFunctions.min(values)" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -===== sum Function - -This function accepts a collection of doubles and returns the sum of the values in that window. `null` and `NaN` values are ignored; -the sum is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `0.0` is returned as the result. - -[[sum-params]] -.`sum(double[] values)` Parameters -[options="header"] -|=== -|Parameter Name |Description -|`values` |The window of values to find the sum of -|=== - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { - "date_histogram": { - "field": "date", - "calendar_interval": "1M" - }, - "aggs": { - "the_sum": { - "sum": { "field": "price" } - }, - "the_moving_sum": { - "moving_fn": { - "buckets_path": "the_sum", - "window": 10, - "script": "MovingFunctions.sum(values)" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -===== stdDev Function - -This function accepts a collection of doubles and average, then returns the standard deviation of the values in that window. -`null` and `NaN` values are ignored; the sum is only calculated over the real values. If the window is empty, or all values are -`null`/`NaN`, `0.0` is returned as the result. - -[[stddev-params]] -.`stdDev(double[] values)` Parameters -[options="header"] -|=== -|Parameter Name |Description -|`values` |The window of values to find the standard deviation of -|`avg` |The average of the window -|=== - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { - "date_histogram": { - "field": "date", - "calendar_interval": "1M" - }, - "aggs": { - "the_sum": { - "sum": { "field": "price" } - }, - "the_moving_sum": { - "moving_fn": { - "buckets_path": "the_sum", - "window": 10, - "script": "MovingFunctions.stdDev(values, MovingFunctions.unweightedAvg(values))" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -The `avg` parameter must be provided to the standard deviation function because different styles of averages can be computed on the window -(simple, linearly weighted, etc). The various moving averages that are detailed below can be used to calculate the average for the -standard deviation function. - -===== unweightedAvg Function - -The `unweightedAvg` function calculates the sum of all values in the window, then divides by the size of the window. It is effectively -a simple arithmetic mean of the window. The simple moving average does not perform any time-dependent weighting, which means -the values from a `simple` moving average tend to "lag" behind the real data. - -`null` and `NaN` values are ignored; the average is only calculated over the real values. If the window is empty, or all values are -`null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` -values. - -[[unweightedavg-params]] -.`unweightedAvg(double[] values)` Parameters -[options="header"] -|=== -|Parameter Name |Description -|`values` |The window of values to find the sum of -|=== - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { - "date_histogram": { - "field": "date", - "calendar_interval": "1M" - }, - "aggs": { - "the_sum": { - "sum": { "field": "price" } - }, - "the_movavg": { - "moving_fn": { - "buckets_path": "the_sum", - "window": 10, - "script": "MovingFunctions.unweightedAvg(values)" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -==== linearWeightedAvg Function - -The `linearWeightedAvg` function assigns a linear weighting to points in the series, such that "older" datapoints (e.g. those at -the beginning of the window) contribute a linearly less amount to the total average. The linear weighting helps reduce -the "lag" behind the data's mean, since older points have less influence. - -If the window is empty, or all values are `null`/`NaN`, `NaN` is returned as the result. - -[[linearweightedavg-params]] -.`linearWeightedAvg(double[] values)` Parameters -[options="header"] -|=== -|Parameter Name |Description -|`values` |The window of values to find the sum of -|=== - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { - "date_histogram": { - "field": "date", - "calendar_interval": "1M" - }, - "aggs": { - "the_sum": { - "sum": { "field": "price" } - }, - "the_movavg": { - "moving_fn": { - "buckets_path": "the_sum", - "window": 10, - "script": "MovingFunctions.linearWeightedAvg(values)" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -==== ewma Function - -The `ewma` function (aka "single-exponential") is similar to the `linearMovAvg` function, -except older data-points become exponentially less important, -rather than linearly less important. The speed at which the importance decays can be controlled with an `alpha` -setting. Small values make the weight decay slowly, which provides greater smoothing and takes into account a larger -portion of the window. Larger values make the weight decay quickly, which reduces the impact of older values on the -moving average. This tends to make the moving average track the data more closely but with less smoothing. - -`null` and `NaN` values are ignored; the average is only calculated over the real values. If the window is empty, or all values are -`null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` -values. - -[[ewma-params]] -.`ewma(double[] values, double alpha)` Parameters -[options="header"] -|=== -|Parameter Name |Description -|`values` |The window of values to find the sum of -|`alpha` |Exponential decay -|=== - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { - "date_histogram": { - "field": "date", - "calendar_interval": "1M" - }, - "aggs": { - "the_sum": { - "sum": { "field": "price" } - }, - "the_movavg": { - "moving_fn": { - "buckets_path": "the_sum", - "window": 10, - "script": "MovingFunctions.ewma(values, 0.3)" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - - -==== holt Function - -The `holt` function (aka "double exponential") incorporates a second exponential term which -tracks the data's trend. Single exponential does not perform well when the data has an underlying linear trend. The -double exponential model calculates two values internally: a "level" and a "trend". - -The level calculation is similar to `ewma`, and is an exponentially weighted view of the data. The difference is -that the previously smoothed value is used instead of the raw value, which allows it to stay close to the original series. -The trend calculation looks at the difference between the current and last value (e.g. the slope, or trend, of the -smoothed data). The trend value is also exponentially weighted. - -Values are produced by multiplying the level and trend components. - -`null` and `NaN` values are ignored; the average is only calculated over the real values. If the window is empty, or all values are -`null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` -values. - -[[holt-params]] -.`holt(double[] values, double alpha)` Parameters -[options="header"] -|=== -|Parameter Name |Description -|`values` |The window of values to find the sum of -|`alpha` |Level decay value -|`beta` |Trend decay value -|=== - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { - "date_histogram": { - "field": "date", - "calendar_interval": "1M" - }, - "aggs": { - "the_sum": { - "sum": { "field": "price" } - }, - "the_movavg": { - "moving_fn": { - "buckets_path": "the_sum", - "window": 10, - "script": "MovingFunctions.holt(values, 0.3, 0.1)" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -In practice, the `alpha` value behaves very similarly in `holtMovAvg` as `ewmaMovAvg`: small values produce more smoothing -and more lag, while larger values produce closer tracking and less lag. The value of `beta` is often difficult -to see. Small values emphasize long-term trends (such as a constant linear trend in the whole series), while larger -values emphasize short-term trends. - -==== holtWinters Function - -The `holtWinters` function (aka "triple exponential") incorporates a third exponential term which -tracks the seasonal aspect of your data. This aggregation therefore smooths based on three components: "level", "trend" -and "seasonality". - -The level and trend calculation is identical to `holt` The seasonal calculation looks at the difference between -the current point, and the point one period earlier. - -Holt-Winters requires a little more handholding than the other moving averages. You need to specify the "periodicity" -of your data: e.g. if your data has cyclic trends every 7 days, you would set `period = 7`. Similarly if there was -a monthly trend, you would set it to `30`. There is currently no periodicity detection, although that is planned -for future enhancements. - -`null` and `NaN` values are ignored; the average is only calculated over the real values. If the window is empty, or all values are -`null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` -values. - -[[holtwinters-params]] -.`holtWinters(double[] values, double alpha)` Parameters -[options="header"] -|=== -|Parameter Name |Description -|`values` |The window of values to find the sum of -|`alpha` |Level decay value -|`beta` |Trend decay value -|`gamma` |Seasonality decay value -|`period` |The periodicity of the data -|`multiplicative` |True if you wish to use multiplicative holt-winters, false to use additive -|=== - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { - "date_histogram": { - "field": "date", - "calendar_interval": "1M" - }, - "aggs": { - "the_sum": { - "sum": { "field": "price" } - }, - "the_movavg": { - "moving_fn": { - "buckets_path": "the_sum", - "window": 10, - "script": "if (values.length > 5*2) {MovingFunctions.holtWinters(values, 0.3, 0.1, 0.1, 5, false)}" - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -[WARNING] -====== -Multiplicative Holt-Winters works by dividing each data point by the seasonal value. This is problematic if any of -your data is zero, or if there are gaps in the data (since this results in a divid-by-zero). To combat this, the -`mult` Holt-Winters pads all values by a very small amount (1*10^-10^) so that all values are non-zero. This affects -the result, but only minimally. If your data is non-zero, or you prefer to see `NaN` when zero's are encountered, -you can disable this behavior with `pad: false` -====== - -===== "Cold Start" - -Unfortunately, due to the nature of Holt-Winters, it requires two periods of data to "bootstrap" the algorithm. This -means that your `window` must always be *at least* twice the size of your period. An exception will be thrown if it -isn't. It also means that Holt-Winters will not emit a value for the first `2 * period` buckets; the current algorithm -does not backcast. - -You'll notice in the above example we have an `if ()` statement checking the size of values. This is checking to make sure -we have two periods worth of data (`5 * 2`, where 5 is the period specified in the `holtWintersMovAvg` function) before calling -the holt-winters function. diff --git a/docs/reference/aggregations/pipeline/moving-percentiles-aggregation.asciidoc b/docs/reference/aggregations/pipeline/moving-percentiles-aggregation.asciidoc deleted file mode 100644 index e4538a90e8a06..0000000000000 --- a/docs/reference/aggregations/pipeline/moving-percentiles-aggregation.asciidoc +++ /dev/null @@ -1,164 +0,0 @@ -[role="xpack"] -[[search-aggregations-pipeline-moving-percentiles-aggregation]] -=== Moving percentiles aggregation -++++ -Moving percentiles -++++ - -Given an ordered series of <>, the Moving Percentile aggregation -will slide a window across those percentiles and allow the user to compute the cumulative percentile. - -This is conceptually very similar to the <> pipeline aggregation, -except it works on the percentiles sketches instead of the actual buckets values. - -==== Syntax - -A `moving_percentiles` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "moving_percentiles": { - "buckets_path": "the_percentile", - "window": 10 - } -} --------------------------------------------------- -// NOTCONSOLE - -[[moving-percentiles-params]] -.`moving_percentiles` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |Path to the percentile of interest (see <> for more details |Required | -|`window` |The size of window to "slide" across the histogram. |Required | -|`shift` |<> of window position. |Optional | 0 -|=== - -`moving_percentiles` aggregations must be embedded inside of a `histogram` or `date_histogram` aggregation. They can be -embedded like any other metric aggregation: - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { <1> - "date_histogram": { - "field": "date", - "calendar_interval": "1M" - }, - "aggs": { - "the_percentile": { <2> - "percentiles": { - "field": "price", - "percents": [ 1.0, 99.0 ] - } - }, - "the_movperc": { - "moving_percentiles": { - "buckets_path": "the_percentile", <3> - "window": 10 - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> A `date_histogram` named "my_date_histo" is constructed on the "timestamp" field, with one-day intervals -<2> A `percentile` metric is used to calculate the percentiles of a field. -<3> Finally, we specify a `moving_percentiles` aggregation which uses "the_percentile" sketch as its input. - -Moving percentiles are built by first specifying a `histogram` or `date_histogram` over a field. You then add -a percentile metric inside of that histogram. Finally, the `moving_percentiles` is embedded inside the histogram. -The `buckets_path` parameter is then used to "point" at the percentiles aggregation inside of the histogram (see -<> for a description of the syntax for `buckets_path`). - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "my_date_histo": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "the_percentile": { - "values": { - "1.0": 151.0, - "99.0": 200.0 - } - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "the_percentile": { - "values": { - "1.0": 10.4, - "99.0": 49.6 - } - }, - "the_movperc": { - "values": { - "1.0": 151.0, - "99.0": 200.0 - } - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "the_percentile": { - "values": { - "1.0": 175.25, - "99.0": 199.75 - } - }, - "the_movperc": { - "values": { - "1.0": 11.6, - "99.0": 200.0 - } - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -The output format of the `moving_percentiles` aggregation is inherited from the format of the referenced -<> aggregation. - -Moving percentiles pipeline aggregations always run with `skip` gap policy. - - -[[moving-percentiles-shift-parameter]] -==== shift parameter - -By default (with `shift = 0`), the window that is offered for calculation is the last `n` values excluding the current bucket. -Increasing `shift` by 1 moves starting window position by `1` to the right. - -- To include current bucket to the window, use `shift = 1`. -- For center alignment (`n / 2` values before and after the current bucket), use `shift = window / 2`. -- For right alignment (`n` values after the current bucket), use `shift = window`. - -If either of window edges moves outside the borders of data series, the window shrinks to include available values only. diff --git a/docs/reference/aggregations/pipeline/normalize-aggregation.asciidoc b/docs/reference/aggregations/pipeline/normalize-aggregation.asciidoc deleted file mode 100644 index 8989b2c66c7f2..0000000000000 --- a/docs/reference/aggregations/pipeline/normalize-aggregation.asciidoc +++ /dev/null @@ -1,186 +0,0 @@ -[role="xpack"] -[[search-aggregations-pipeline-normalize-aggregation]] -=== Normalize aggregation -++++ -Normalize -++++ - -A parent pipeline aggregation which calculates the specific normalized/rescaled value for a specific bucket value. -Values that cannot be normalized, will be skipped using the <>. - -==== Syntax - -A `normalize` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "normalize": { - "buckets_path": "normalized", - "method": "percent_of_sum" - } -} --------------------------------------------------- -// NOTCONSOLE - -[[normalize_pipeline-params]] -.`normalize_pipeline` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |The path to the buckets we wish to normalize (see <> for more details) |Required | -|`method` | The specific <> to apply | Required | -|`format` |{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property |Optional |`null` -|=== - -==== Methods -[[normalize_pipeline-method]] - -The Normalize Aggregation supports multiple methods to transform the bucket values. Each method definition will use -the following original set of bucket values as examples: `[5, 5, 10, 50, 10, 20]`. - -_rescale_0_1_:: - This method rescales the data such that the minimum number is zero, and the maximum number is 1, with the rest normalized - linearly in-between. - - x' = (x - min_x) / (max_x - min_x) - - [0, 0, .1111, 1, .1111, .3333] - -_rescale_0_100_:: - This method rescales the data such that the minimum number is zero, and the maximum number is 100, with the rest normalized - linearly in-between. - - x' = 100 * (x - min_x) / (max_x - min_x) - - [0, 0, 11.11, 100, 11.11, 33.33] - -_percent_of_sum_:: - This method normalizes each value so that it represents a percentage of the total sum it attributes to. - - x' = x / sum_x - - [5%, 5%, 10%, 50%, 10%, 20%] - - -_mean_:: - This method normalizes such that each value is normalized by how much it differs from the average. - - x' = (x - mean_x) / (max_x - min_x) - - [4.63, 4.63, 9.63, 49.63, 9.63, 9.63, 19.63] - -_z-score_:: - This method normalizes such that each value represents how far it is from the mean relative to the standard deviation - - x' = (x - mean_x) / stdev_x - - [-0.68, -0.68, -0.39, 1.94, -0.39, 0.19] - -_softmax_:: - This method normalizes such that each value is exponentiated and relative to the sum of the exponents of the original values. - - x' = e^x / sum_e_x - - [2.862E-20, 2.862E-20, 4.248E-18, 0.999, 9.357E-14, 4.248E-18] - - -==== Example - -The following snippet calculates the percent of total sales for each month: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - }, - "percent_of_total_sales": { - "normalize": { - "buckets_path": "sales", <1> - "method": "percent_of_sum", <2> - "format": "00.00%" <3> - } - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `buckets_path` instructs this normalize aggregation to use the output of the `sales` aggregation for rescaling -<2> `method` sets which rescaling to apply. In this case, `percent_of_sum` will calculate the sales value as a percent of all sales - in the parent bucket -<3> `format` influences how to format the metric as a string using Java's `DecimalFormat` pattern. In this case, multiplying by 100 - and adding a '%' - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550.0 - }, - "percent_of_total_sales": { - "value": 0.5583756345177665, - "value_as_string": "55.84%" - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60.0 - }, - "percent_of_total_sales": { - "value": 0.06091370558375635, - "value_as_string": "06.09%" - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "sales": { - "value": 375.0 - }, - "percent_of_total_sales": { - "value": 0.38071065989847713, - "value_as_string": "38.07%" - } - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] diff --git a/docs/reference/aggregations/pipeline/percentiles-bucket-aggregation.asciidoc b/docs/reference/aggregations/pipeline/percentiles-bucket-aggregation.asciidoc deleted file mode 100644 index d5bd868258081..0000000000000 --- a/docs/reference/aggregations/pipeline/percentiles-bucket-aggregation.asciidoc +++ /dev/null @@ -1,137 +0,0 @@ -[[search-aggregations-pipeline-percentiles-bucket-aggregation]] -=== Percentiles bucket aggregation -++++ -Percentiles bucket -++++ - -A sibling pipeline aggregation which calculates percentiles across all bucket of a specified metric in a sibling aggregation. -The specified metric must be numeric and the sibling aggregation must be a multi-bucket aggregation. - -==== Syntax - -A `percentiles_bucket` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "percentiles_bucket": { - "buckets_path": "the_sum" - } -} --------------------------------------------------- -// NOTCONSOLE - -[[percentiles-bucket-params]] -.`percentiles_bucket` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |The path to the buckets we wish to find the percentiles for (see <> for more - details) |Required | -|`gap_policy` |The policy to apply when gaps are found in the data (see <> for more - details)|Optional | `skip` -|`format` |{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property |Optional | `null` -|`percents` |The list of percentiles to calculate |Optional | `[ 1, 5, 25, 50, 75, 95, 99 ]` -|`keyed` |Flag which returns the range as an hash instead of an array of key-value pairs |Optional | `true` -|=== - -The following snippet calculates the percentiles for the total monthly `sales` buckets: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - } - } - }, - "percentiles_monthly_sales": { - "percentiles_bucket": { - "buckets_path": "sales_per_month>sales", <1> - "percents": [ 25.0, 50.0, 75.0 ] <2> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `buckets_path` instructs this percentiles_bucket aggregation that we want to calculate percentiles for -the `sales` aggregation in the `sales_per_month` date histogram. -<2> `percents` specifies which percentiles we wish to calculate, in this case, the 25th, 50th and 75th percentiles. - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550.0 - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60.0 - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "sales": { - "value": 375.0 - } - } - ] - }, - "percentiles_monthly_sales": { - "values" : { - "25.0": 375.0, - "50.0": 375.0, - "75.0": 550.0 - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] - -==== Percentiles_bucket implementation - -The percentiles are calculated exactly and is not an approximation (unlike the Percentiles Metric). This means -the implementation maintains an in-memory, sorted list of your data to compute the percentiles, before discarding the -data. You may run into memory pressure issues if you attempt to calculate percentiles over many millions of -data-points in a single `percentiles_bucket`. - -The Percentile Bucket returns the nearest input data point to the requested percentile, rounding indices toward -positive infinity; it does not interpolate between data points. For example, if there are eight data points and -you request the `50%th` percentile, it will return the `4th` item because `ROUND_UP(.50 * (8-1))` is `4`. diff --git a/docs/reference/aggregations/pipeline/serial-diff-aggregation.asciidoc b/docs/reference/aggregations/pipeline/serial-diff-aggregation.asciidoc deleted file mode 100644 index 432bd08cbe288..0000000000000 --- a/docs/reference/aggregations/pipeline/serial-diff-aggregation.asciidoc +++ /dev/null @@ -1,104 +0,0 @@ -[[search-aggregations-pipeline-serialdiff-aggregation]] -=== Serial differencing aggregation -++++ -Serial differencing -++++ - -Serial differencing is a technique where values in a time series are subtracted from itself at -different time lags or periods. For example, the datapoint f(x) = f(x~t~) - f(x~t-n~), where n is the period being used. - -A period of 1 is equivalent to a derivative with no time normalization: it is simply the change from one point to the -next. Single periods are useful for removing constant, linear trends. - -Single periods are also useful for transforming data into a stationary series. In this example, the Dow Jones is -plotted over ~250 days. The raw data is not stationary, which would make it difficult to use with some techniques. - -By calculating the first-difference, we de-trend the data (e.g. remove a constant, linear trend). We can see that the -data becomes a stationary series (e.g. the first difference is randomly distributed around zero, and doesn't seem to -exhibit any pattern/behavior). The transformation reveals that the dataset is following a random-walk; the value is the -previous value +/- a random amount. This insight allows selection of further tools for analysis. - -[[serialdiff_dow]] -.Dow Jones plotted and made stationary with first-differencing -image::images/pipeline_serialdiff/dow.png[] - -Larger periods can be used to remove seasonal / cyclic behavior. In this example, a population of lemmings was -synthetically generated with a sine wave + constant linear trend + random noise. The sine wave has a period of 30 days. - -The first-difference removes the constant trend, leaving just a sine wave. The 30th-difference is then applied to the -first-difference to remove the cyclic behavior, leaving a stationary series which is amenable to other analysis. - -[[serialdiff_lemmings]] -.Lemmings data plotted made stationary with 1st and 30th difference -image::images/pipeline_serialdiff/lemmings.png[] - - - -==== Syntax - -A `serial_diff` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "serial_diff": { - "buckets_path": "the_sum", - "lag": 7 - } -} --------------------------------------------------- -// NOTCONSOLE - -[[serial-diff-params]] -.`serial_diff` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |Path to the metric of interest (see <> for more details |Required | -|`lag` |The historical bucket to subtract from the current value. E.g. a lag of 7 will subtract the current value from - the value 7 buckets ago. Must be a positive, non-zero integer |Optional |`1` -|`gap_policy` |Determines what should happen when a gap in the data is encountered. |Optional |`insert_zeros` -|`format` |{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property |Optional | `null` -|=== - -`serial_diff` aggregations must be embedded inside of a `histogram` or `date_histogram` aggregation: - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": 0, - "aggs": { - "my_date_histo": { <1> - "date_histogram": { - "field": "timestamp", - "calendar_interval": "day" - }, - "aggs": { - "the_sum": { - "sum": { - "field": "lemmings" <2> - } - }, - "thirtieth_difference": { - "serial_diff": { <3> - "buckets_path": "the_sum", - "lag" : 30 - } - } - } - } - } -} --------------------------------------------------- - -<1> A `date_histogram` named "my_date_histo" is constructed on the "timestamp" field, with one-day intervals -<2> A `sum` metric is used to calculate the sum of a field. This could be any metric (sum, min, max, etc) -<3> Finally, we specify a `serial_diff` aggregation which uses "the_sum" metric as its input. - -Serial differences are built by first specifying a `histogram` or `date_histogram` over a field. You can then optionally -add normal metrics, such as a `sum`, inside of that histogram. Finally, the `serial_diff` is embedded inside the histogram. -The `buckets_path` parameter is then used to "point" at one of the sibling metrics inside of the histogram (see -<> for a description of the syntax for `buckets_path`. diff --git a/docs/reference/aggregations/pipeline/stats-bucket-aggregation.asciidoc b/docs/reference/aggregations/pipeline/stats-bucket-aggregation.asciidoc deleted file mode 100644 index 205c93cf98103..0000000000000 --- a/docs/reference/aggregations/pipeline/stats-bucket-aggregation.asciidoc +++ /dev/null @@ -1,122 +0,0 @@ -[[search-aggregations-pipeline-stats-bucket-aggregation]] -=== Stats bucket aggregation -++++ -Stats bucket -++++ - -A sibling pipeline aggregation which calculates a variety of stats across all bucket of a specified metric in a sibling aggregation. -The specified metric must be numeric and the sibling aggregation must be a multi-bucket aggregation. - -==== Syntax - -A `stats_bucket` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "stats_bucket": { - "buckets_path": "the_sum" - } -} --------------------------------------------------- -// NOTCONSOLE - -[[stats-bucket-params]] -.`stats_bucket` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |The path to the buckets we wish to calculate stats for (see <> for more - details) |Required | -|`gap_policy` |The policy to apply when gaps are found in the data (see <> for more - details)|Optional | `skip` -|`format` |{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property|Optional | `null` -|=== - -The following snippet calculates the stats for monthly `sales`: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - } - } - }, - "stats_monthly_sales": { - "stats_bucket": { - "buckets_path": "sales_per_month>sales" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `bucket_paths` instructs this `stats_bucket` aggregation that we want the calculate stats for the `sales` aggregation in the -`sales_per_month` date histogram. - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550.0 - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60.0 - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "sales": { - "value": 375.0 - } - } - ] - }, - "stats_monthly_sales": { - "count": 3, - "min": 60.0, - "max": 550.0, - "avg": 328.3333333333333, - "sum": 985.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] diff --git a/docs/reference/aggregations/pipeline/sum-bucket-aggregation.asciidoc b/docs/reference/aggregations/pipeline/sum-bucket-aggregation.asciidoc deleted file mode 100644 index 5b0b38030291c..0000000000000 --- a/docs/reference/aggregations/pipeline/sum-bucket-aggregation.asciidoc +++ /dev/null @@ -1,119 +0,0 @@ -[[search-aggregations-pipeline-sum-bucket-aggregation]] -=== Sum bucket aggregation -++++ -Sum bucket -++++ - - -A sibling pipeline aggregation which calculates the sum across all buckets of a specified metric in a sibling aggregation. -The specified metric must be numeric and the sibling aggregation must be a multi-bucket aggregation. - -==== Syntax - -A `sum_bucket` aggregation looks like this in isolation: - -[source,js] --------------------------------------------------- -{ - "sum_bucket": { - "buckets_path": "the_sum" - } -} --------------------------------------------------- -// NOTCONSOLE - -[[sum-bucket-params]] -.`sum_bucket` Parameters -[options="header"] -|=== -|Parameter Name |Description |Required |Default Value -|`buckets_path` |The path to the buckets we wish to find the sum for (see <> for more - details) |Required | - |`gap_policy` |The policy to apply when gaps are found in the data (see <> for more - details)|Optional | `skip` - |`format` |{javadoc}/java.base/java/text/DecimalFormat.html[DecimalFormat pattern] for the -output value. If specified, the formatted value is returned in the aggregation's -`value_as_string` property. |Optional |`null` -|=== - -The following snippet calculates the sum of all the total monthly `sales` buckets: - -[source,console] --------------------------------------------------- -POST /sales/_search -{ - "size": 0, - "aggs": { - "sales_per_month": { - "date_histogram": { - "field": "date", - "calendar_interval": "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - } - } - }, - "sum_monthly_sales": { - "sum_bucket": { - "buckets_path": "sales_per_month>sales" <1> - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -<1> `buckets_path` instructs this sum_bucket aggregation that we want the sum of the `sales` aggregation in the -`sales_per_month` date histogram. - -And the following may be the response: - -[source,console-result] --------------------------------------------------- -{ - "took": 11, - "timed_out": false, - "_shards": ..., - "hits": ..., - "aggregations": { - "sales_per_month": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550.0 - } - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60.0 - } - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "sales": { - "value": 375.0 - } - } - ] - }, - "sum_monthly_sales": { - "value": 985.0 - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 11/"took": $body.took/] -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/] diff --git a/docs/reference/alias.asciidoc b/docs/reference/alias.asciidoc deleted file mode 100644 index 3f8553c3b96d9..0000000000000 --- a/docs/reference/alias.asciidoc +++ /dev/null @@ -1,432 +0,0 @@ -[chapter] -[[aliases]] -= Aliases - -An alias points to one or more indices or data streams. Most {es} -APIs accept an alias in place of a data stream or index name. - -Aliases enable you to: - -* Query multiple indices/data streams together with a single name -* Change which indices/data streams your application uses in real time -* <> data without downtime - -[discrete] -[[alias-types]] -=== Alias types - -There are two types of aliases: - -* A **data stream alias** points to one or more data streams. -* An **index alias** points to one or more indices. - -An alias cannot point to both data streams and indices. You also cannot add a -data stream's backing index to an index alias. - -[discrete] -[[add-alias]] -=== Add an alias - -To add an existing data stream or index to an alias, use the -<>'s `add` action. If the alias doesn't exist, the -request creates it. - -[source,console] ----- -POST _aliases -{ - "actions": [ - { - "add": { - "index": "logs-nginx.access-prod", - "alias": "logs" - } - } - ] -} ----- -// TEST[s/^/PUT _data_stream\/logs-nginx.access-prod\n/] - -The API's `index` and `indices` parameters support wildcards (`*`). Wildcard -patterns that match both data streams and indices return an error. - -[source,console] ----- -POST _aliases -{ - "actions": [ - { - "add": { - "index": "logs-*", - "alias": "logs" - } - } - ] -} ----- -// TEST[s/^/PUT _data_stream\/logs-nginx.access-prod\n/] - -[discrete] -[[remove-alias]] -=== Remove an alias - -To remove an alias, use the aliases API's `remove` action. - -[source,console] ----- -POST _aliases -{ - "actions": [ - { - "remove": { - "index": "logs-nginx.access-prod", - "alias": "logs" - } - } - ] -} ----- -// TEST[continued] - -[discrete] -[[multiple-actions]] -=== Multiple actions - -You can use the aliases API to perform multiple actions in a single atomic -operation. - -// tag::alias-multiple-actions-example[] -For example, the `logs` alias points to a single data stream. The following -request swaps the stream for the alias. During this swap, the `logs` alias has -no downtime and never points to both streams at the same time. - -[source,console] ----- -POST _aliases -{ - "actions": [ - { - "remove": { - "index": "logs-nginx.access-prod", - "alias": "logs" - } - }, - { - "add": { - "index": "logs-my_app-default", - "alias": "logs" - } - } - ] -} ----- -// TEST[s/^/PUT _data_stream\/logs-nginx.access-prod\nPUT _data_stream\/logs-my_app-default\n/] -// end::alias-multiple-actions-example[] - -[discrete] -[[multiple-action-results]] -=== Multiple action results - -When using multiple actions, if some succeed and some fail, a list of per-action results will be returned. - -Consider a similar action list to the previous example, but now with an alias `log-non-existing`, which does not yet exist. -In this case, the `remove` action will fail, but the `add` action will succeed. -The response will contain the list `action_results`, with a result for every requested action. - -[source,console] ----- -POST _aliases -{ - "actions": [ - { - "remove": { - "index": "index1", - "alias": "logs-non-existing" - } - }, - { - "add": { - "index": "index2", - "alias": "logs-non-existing" - } - } - ] -} ----- -// TEST[s/^/PUT \/index1\nPUT \/index2\n/] - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged": true, - "errors": true, - "action_results": [ - { - "action": { - "type": "remove", - "indices": [ "index1" ], - "aliases": [ "logs-non-existing" ], - }, - "status": 404, - "error": { - "type": "aliases_not_found_exception", - "reason": "aliases [logs-non-existing] missing", - "resource.type": "aliases", - "resource.id": "logs-non-existing" - } - }, - { - "action": { - "type": "add", - "indices": [ "index2" ], - "aliases": [ "logs-non-existing" ], - }, - "status": 200 - } - ] -} --------------------------------------------------- - -Allowing the action list to succeed partially may not provide the desired result. -It may be more appropriate to set `must_exist` to `true`, which will cause the entire action -list to fail if a single action fails. - - -[discrete] -[[add-alias-at-creation]] -=== Add an alias at index creation - -You can also use a <> or -<> to add index or data stream aliases -when they are created. - -[source,console] ----- -# Component template with index aliases -PUT _component_template/my-aliases -{ - "template": { - "aliases": { - "my-alias": {} - } - } -} - -# Index template with index aliases -PUT _index_template/my-index-template -{ - "index_patterns": [ - "my-index-*" - ], - "composed_of": [ - "my-aliases", - "my-mappings", - "my-settings" - ], - "template": { - "aliases": { - "yet-another-alias": {} - } - } -} ----- -// TEST[s/,\n "my-mappings",\n "my-settings"//] -// TEST[teardown:data_stream_cleanup] - -You can also specify index aliases in <> -requests. - -[source,console] ----- -# PUT -PUT %3Cmy-index-%7Bnow%2Fd%7D-000001%3E -{ - "aliases": { - "my-alias": {} - } -} ----- - -[discrete] -[[view-aliases]] -=== View aliases - -To get a list of your cluster's aliases, use the <> with no argument. - -[source,console] ----- -GET _alias ----- -// TEST[s/^/PUT _data_stream\/logs-nginx.access-prod\nPUT logs-nginx.access-prod\/_alias\/logs\n/] - -Specify a data stream or index before `_alias` to view its aliases. - -[source,console] ----- -GET my-data-stream/_alias ----- -// TEST[s/^/PUT _data_stream\/logs-nginx.access-prod\nPUT logs-nginx.access-prod\/_alias\/logs\n/] -// TEST[s/my-data-stream/logs-nginx.access-prod/] - -Specify an alias after `_alias` to view its data streams or indices. - -[source,console] ----- -GET _alias/logs ----- -// TEST[s/^/PUT _data_stream\/logs-nginx.access-prod\nPUT logs-nginx.access-prod\/_alias\/logs\n/] - -[discrete] -[[write-index]] -=== Write index - -You can use `is_write_index` to specify a write index or data stream for an -alias. {es} routes any write requests for the alias to this index or data -stream. - -[source,console] ----- -POST _aliases -{ - "actions": [ - { - "add": { - "index": "logs-nginx.access-prod", - "alias": "logs" - } - }, - { - "add": { - "index": "logs-my_app-default", - "alias": "logs", - "is_write_index": true - } - } - ] -} ----- -// TEST[s/^/PUT _data_stream\/logs-nginx.access-prod\nPUT _data_stream\/logs-my_app-default\n/] - -include::{es-ref-dir}/indices/aliases.asciidoc[tag=write-index-defaults] - -TIP: We recommend using data streams to store append-only time series data. If -you need to update or delete existing time series data, you can perform update or delete operations -directly on the data stream backing index. If you frequently send multiple documents using the same -`_id` expecting last-write-wins, you may want to use an index alias with a write index instead. See -<>. - -[discrete] -[[filter-alias]] -=== Filter an alias - -The `filter` option uses <> to limit the documents an alias -can access. - -[source,console] ----- -POST _aliases -{ - "actions": [ - { - "add": { - "index": "my-index-2099.05.06-000001", - "alias": "my-alias", - "filter": { - "bool": { - "filter": [ - { - "range": { - "@timestamp": { - "gte": "now-1d/d", - "lt": "now/d" - } - } - }, - { - "term": { - "user.id": "kimchy" - } - } - ] - } - } - } - } - ] -} ----- -// TEST[s/^/PUT my-index-2099.05.06-000001\n/] - -NOTE: Filters are only applied when using the <>, and are not applied when <>. - -[discrete] -[[alias-routing]] -=== Routing - -Use the `routing` option to <> requests for an -alias to a specific shard. This lets you take advantage of -<> to speed up searches. Data stream aliases -do not support routing options. - -[source,console] ----- -POST _aliases -{ - "actions": [ - { - "add": { - "index": "my-index-2099.05.06-000001", - "alias": "my-alias", - "routing": "1" - } - } - ] -} ----- -// TEST[s/^/PUT my-index-2099.05.06-000001\n/] - -Use `index_routing` and `search_routing` to specify different routing values for -indexing and search. If specified, these options overwrite the `routing` value -for their respective operations. - -[source,console] ----- -POST _aliases -{ - "actions": [ - { - "add": { - "index": "my-index-2099.05.06-000001", - "alias": "my-alias", - "search_routing": "1", - "index_routing": "2" - } - } - ] -} ----- -// TEST[s/^/PUT my-index-2099.05.06-000001\n/] - -[discrete] -[[remove-index]] -=== Remove an index - -To remove an index, use the aliases API's `remove_index` action. - -[source,console] ----- -POST _aliases -{ - "actions": [ - { - "remove_index": { - "index": "my-index-2099.05.06-000001" - } - } - ] -} ----- -// TEST[s/^/PUT my-index-2099.05.06-000001\n/] diff --git a/docs/reference/analysis.asciidoc b/docs/reference/analysis.asciidoc deleted file mode 100644 index e8fbc3bd81b6d..0000000000000 --- a/docs/reference/analysis.asciidoc +++ /dev/null @@ -1,61 +0,0 @@ -[[analysis]] -= Text analysis - -:lucene-analysis-docs: https://lucene.apache.org/core/{lucene_version_path}/analysis/common/org/apache/lucene/analysis -:lucene-gh-main-link: https://github.com/apache/lucene/blob/main/lucene -:lucene-stop-word-link: {lucene-gh-main-link}/analysis/common/src/resources/org/apache/lucene/analysis - -[partintro] --- - -_Text analysis_ is the process of converting unstructured text, like -the body of an email or a product description, into a structured format that's <>. - -[discrete] -[[when-to-configure-analysis]] -=== When to configure text analysis - -{es} performs text analysis when indexing or searching <> fields. - -If your index doesn't contain `text` fields, no further setup is needed; you can -skip the pages in this section. - -However, if you use `text` fields or your text searches aren't returning results -as expected, configuring text analysis can often help. You should also look into -analysis configuration if you're using {es} to: - -* Build a search engine -* Mine unstructured data -* Fine-tune search for a specific language -* Perform lexicographic or linguistic research - -[discrete] -[[analysis-toc]] -=== In this section - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - --- - -include::analysis/overview.asciidoc[] - -include::analysis/concepts.asciidoc[] - -include::analysis/configure-text-analysis.asciidoc[] - -include::analysis/analyzers.asciidoc[] - -include::analysis/tokenizers.asciidoc[] - -include::analysis/tokenfilters.asciidoc[] - -include::analysis/charfilters.asciidoc[] - -include::analysis/normalizers.asciidoc[] diff --git a/docs/reference/analysis/analyzers.asciidoc b/docs/reference/analysis/analyzers.asciidoc deleted file mode 100644 index 1aacbf62a6d68..0000000000000 --- a/docs/reference/analysis/analyzers.asciidoc +++ /dev/null @@ -1,71 +0,0 @@ -[[analysis-analyzers]] -== Built-in analyzer reference - -Elasticsearch ships with a wide range of built-in analyzers, which can be used -in any index without further configuration: - -<>:: - -The `standard` analyzer divides text into terms on word boundaries, as defined -by the Unicode Text Segmentation algorithm. It removes most punctuation, -lowercases terms, and supports removing stop words. - -<>:: - -The `simple` analyzer divides text into terms whenever it encounters a -character which is not a letter. It lowercases all terms. - -<>:: - -The `whitespace` analyzer divides text into terms whenever it encounters any -whitespace character. It does not lowercase terms. - -<>:: - -The `stop` analyzer is like the `simple` analyzer, but also supports removal -of stop words. - -<>:: - -The `keyword` analyzer is a ``noop'' analyzer that accepts whatever text it is -given and outputs the exact same text as a single term. - -<>:: - -The `pattern` analyzer uses a regular expression to split the text into terms. -It supports lower-casing and stop words. - -<>:: - -Elasticsearch provides many language-specific analyzers like `english` or -`french`. - -<>:: - -The `fingerprint` analyzer is a specialist analyzer which creates a -fingerprint which can be used for duplicate detection. - -[discrete] -=== Custom analyzers - -If you do not find an analyzer suitable for your needs, you can create a -<> analyzer which combines the appropriate -<>, -<>, and <>. - - -include::analyzers/fingerprint-analyzer.asciidoc[] - -include::analyzers/keyword-analyzer.asciidoc[] - -include::analyzers/lang-analyzer.asciidoc[] - -include::analyzers/pattern-analyzer.asciidoc[] - -include::analyzers/simple-analyzer.asciidoc[] - -include::analyzers/standard-analyzer.asciidoc[] - -include::analyzers/stop-analyzer.asciidoc[] - -include::analyzers/whitespace-analyzer.asciidoc[] \ No newline at end of file diff --git a/docs/reference/analysis/analyzers/configuring.asciidoc b/docs/reference/analysis/analyzers/configuring.asciidoc deleted file mode 100644 index 0a9b682a71214..0000000000000 --- a/docs/reference/analysis/analyzers/configuring.asciidoc +++ /dev/null @@ -1,94 +0,0 @@ -[[configuring-analyzers]] -=== Configuring built-in analyzers - -The built-in analyzers can be used directly without any configuration. Some -of them, however, support configuration options to alter their behaviour. For -instance, the <> can be configured -to support a list of stop words: - -[source,console] --------------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "std_english": { <1> - "type": "standard", - "stopwords": "_english_" - } - } - } - }, - "mappings": { - "properties": { - "my_text": { - "type": "text", - "analyzer": "standard", <2> - "fields": { - "english": { - "type": "text", - "analyzer": "std_english" <3> - } - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "field": "my_text", <2> - "text": "The old brown cow" -} - -POST my-index-000001/_analyze -{ - "field": "my_text.english", <3> - "text": "The old brown cow" -} - --------------------------------- - -<1> We define the `std_english` analyzer to be based on the `standard` - analyzer, but configured to remove the pre-defined list of English stopwords. -<2> The `my_text` field uses the `standard` analyzer directly, without - any configuration. No stop words will be removed from this field. - The resulting terms are: `[ the, old, brown, cow ]` -<3> The `my_text.english` field uses the `std_english` analyzer, so - English stop words will be removed. The resulting terms are: - `[ old, brown, cow ]` - - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "old", - "start_offset": 4, - "end_offset": 7, - "type": "", - "position": 1 - }, - { - "token": "brown", - "start_offset": 8, - "end_offset": 13, - "type": "", - "position": 2 - }, - { - "token": "cow", - "start_offset": 14, - "end_offset": 17, - "type": "", - "position": 3 - } - ] -} ----------------------------- - -///////////////////// diff --git a/docs/reference/analysis/analyzers/custom-analyzer.asciidoc b/docs/reference/analysis/analyzers/custom-analyzer.asciidoc deleted file mode 100644 index f2808d4c4ff04..0000000000000 --- a/docs/reference/analysis/analyzers/custom-analyzer.asciidoc +++ /dev/null @@ -1,261 +0,0 @@ -[[analysis-custom-analyzer]] -=== Create a custom analyzer - -When the built-in analyzers do not fulfill your needs, you can create a -`custom` analyzer which uses the appropriate combination of: - -* zero or more <> -* a <> -* zero or more <>. - -[discrete] -=== Configuration - -The `custom` analyzer accepts the following parameters: - -[horizontal] -`type`:: - Analyzer type. Accepts <>. For - custom analyzers, use `custom` or omit this parameter. - -`tokenizer`:: - - A built-in or customised <>. - (Required) - -`char_filter`:: - - An optional array of built-in or customised - <>. - -`filter`:: - - An optional array of built-in or customised - <>. - -`position_increment_gap`:: - - When indexing an array of text values, Elasticsearch inserts a fake "gap" - between the last term of one value and the first term of the next value to - ensure that a phrase query doesn't match two terms from different array - elements. Defaults to `100`. See <> for more. - -[discrete] -=== Example configuration - -Here is an example that combines the following: - -Character Filter:: -* <> - -Tokenizer:: -* <> - -Token Filters:: -* <> -* <> - -[source,console] --------------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_custom_analyzer": { - "type": "custom", <1> - "tokenizer": "standard", - "char_filter": [ - "html_strip" - ], - "filter": [ - "lowercase", - "asciifolding" - ] - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_custom_analyzer", - "text": "Is this déjà vu?" -} --------------------------------- - -<1> For `custom` analyzers, use a `type` of `custom` or omit the `type` -parameter. - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "is", - "start_offset": 0, - "end_offset": 2, - "type": "", - "position": 0 - }, - { - "token": "this", - "start_offset": 3, - "end_offset": 7, - "type": "", - "position": 1 - }, - { - "token": "deja", - "start_offset": 11, - "end_offset": 15, - "type": "", - "position": 2 - }, - { - "token": "vu", - "start_offset": 16, - "end_offset": 22, - "type": "", - "position": 3 - } - ] -} ----------------------------- - -///////////////////// - - -The above example produces the following terms: - -[source,text] ---------------------------- -[ is, this, deja, vu ] ---------------------------- - -The previous example used tokenizer, token filters, and character filters with -their default configurations, but it is possible to create configured versions -of each and to use them in a custom analyzer. - -Here is a more complicated example that combines the following: - -Character Filter:: -* <>, configured to replace `:)` with `_happy_` and `:(` with `_sad_` - -Tokenizer:: -* <>, configured to split on punctuation characters - -Token Filters:: -* <> -* <>, configured to use the pre-defined list of English stop words - - -Here is an example: - -[source,console] --------------------------------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_custom_analyzer": { <1> - "char_filter": [ - "emoticons" - ], - "tokenizer": "punctuation", - "filter": [ - "lowercase", - "english_stop" - ] - } - }, - "tokenizer": { - "punctuation": { <2> - "type": "pattern", - "pattern": "[ .,!?]" - } - }, - "char_filter": { - "emoticons": { <3> - "type": "mapping", - "mappings": [ - ":) => _happy_", - ":( => _sad_" - ] - } - }, - "filter": { - "english_stop": { <4> - "type": "stop", - "stopwords": "_english_" - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_custom_analyzer", - "text": "I'm a :) person, and you?" -} --------------------------------------------------- - -<1> Assigns the index a default custom analyzer, `my_custom_analyzer`. This -analyzer uses a custom tokenizer, character filter, and token filter that -are defined later in the request. This analyzer also omits the `type` parameter. -<2> Defines the custom `punctuation` tokenizer. -<3> Defines the custom `emoticons` character filter. -<4> Defines the custom `english_stop` token filter. - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "i'm", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "_happy_", - "start_offset": 6, - "end_offset": 8, - "type": "word", - "position": 2 - }, - { - "token": "person", - "start_offset": 9, - "end_offset": 15, - "type": "word", - "position": 3 - }, - { - "token": "you", - "start_offset": 21, - "end_offset": 24, - "type": "word", - "position": 5 - } - ] -} ----------------------------- - -///////////////////// - - -The above example produces the following terms: - -[source,text] ---------------------------- -[ i'm, _happy_, person, you ] ---------------------------- diff --git a/docs/reference/analysis/analyzers/fingerprint-analyzer.asciidoc b/docs/reference/analysis/analyzers/fingerprint-analyzer.asciidoc deleted file mode 100644 index 9c6fc89a1c988..0000000000000 --- a/docs/reference/analysis/analyzers/fingerprint-analyzer.asciidoc +++ /dev/null @@ -1,178 +0,0 @@ -[[analysis-fingerprint-analyzer]] -=== Fingerprint analyzer -++++ -Fingerprint -++++ - -The `fingerprint` analyzer implements a -https://github.com/OpenRefine/OpenRefine/wiki/Clustering-In-Depth#fingerprint[fingerprinting algorithm] -which is used by the OpenRefine project to assist in clustering. - -Input text is lowercased, normalized to remove extended characters, sorted, -deduplicated and concatenated into a single token. If a stopword list is -configured, stop words will also be removed. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "analyzer": "fingerprint", - "text": "Yes yes, Gödel said this sentence is consistent and." -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "and consistent godel is said sentence this yes", - "start_offset": 0, - "end_offset": 52, - "type": "fingerprint", - "position": 0 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following single term: - -[source,text] ---------------------------- -[ and consistent godel is said sentence this yes ] ---------------------------- - -[discrete] -=== Configuration - -The `fingerprint` analyzer accepts the following parameters: - -[horizontal] -`separator`:: - - The character to use to concatenate the terms. Defaults to a space. - -`max_output_size`:: - - The maximum token size to emit. Defaults to `255`. Tokens larger than - this size will be discarded. - -`stopwords`:: - - A pre-defined stop words list like `_english_` or an array containing a - list of stop words. Defaults to `_none_`. - -`stopwords_path`:: - - The path to a file containing stop words. - -See the <> for more information -about stop word configuration. - - -[discrete] -=== Example configuration - -In this example, we configure the `fingerprint` analyzer to use the -pre-defined list of English stop words: - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_fingerprint_analyzer": { - "type": "fingerprint", - "stopwords": "_english_" - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_fingerprint_analyzer", - "text": "Yes yes, Gödel said this sentence is consistent and." -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "consistent godel said sentence yes", - "start_offset": 0, - "end_offset": 52, - "type": "fingerprint", - "position": 0 - } - ] -} ----------------------------- - -///////////////////// - - -The above example produces the following term: - -[source,text] ---------------------------- -[ consistent godel said sentence yes ] ---------------------------- - -[discrete] -=== Definition - -The `fingerprint` tokenizer consists of: - -Tokenizer:: -* <> - -Token Filters (in order):: -* <> -* <> -* <> (disabled by default) -* <> - -If you need to customize the `fingerprint` analyzer beyond the configuration -parameters then you need to recreate it as a `custom` analyzer and modify -it, usually by adding token filters. This would recreate the built-in -`fingerprint` analyzer and you can use it as a starting point for further -customization: - -[source,console] ----------------------------------------------------- -PUT /fingerprint_example -{ - "settings": { - "analysis": { - "analyzer": { - "rebuilt_fingerprint": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "asciifolding", - "fingerprint" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: fingerprint_example, first: fingerprint, second: rebuilt_fingerprint}\nendyaml\n/] diff --git a/docs/reference/analysis/analyzers/keyword-analyzer.asciidoc b/docs/reference/analysis/analyzers/keyword-analyzer.asciidoc deleted file mode 100644 index 888376bc46fa1..0000000000000 --- a/docs/reference/analysis/analyzers/keyword-analyzer.asciidoc +++ /dev/null @@ -1,89 +0,0 @@ -[[analysis-keyword-analyzer]] -=== Keyword analyzer -++++ -Keyword -++++ - -The `keyword` analyzer is a ``noop'' analyzer which returns the entire input -string as a single token. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "analyzer": "keyword", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone.", - "start_offset": 0, - "end_offset": 56, - "type": "word", - "position": 0 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following single term: - -[source,text] ---------------------------- -[ The 2 QUICK Brown-Foxes jumped over the lazy dog's bone. ] ---------------------------- - -[discrete] -=== Configuration - -The `keyword` analyzer is not configurable. - -[discrete] -=== Definition - -The `keyword` analyzer consists of: - -Tokenizer:: -* <> - -If you need to customize the `keyword` analyzer then you need to -recreate it as a `custom` analyzer and modify it, usually by adding -token filters. Usually, you should prefer the -<> when you want strings that are not split -into tokens, but just in case you need it, this would recreate the -built-in `keyword` analyzer and you can use it as a starting point -for further customization: - -[source,console] ----------------------------------------------------- -PUT /keyword_example -{ - "settings": { - "analysis": { - "analyzer": { - "rebuilt_keyword": { - "tokenizer": "keyword", - "filter": [ <1> - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: keyword_example, first: keyword, second: rebuilt_keyword}\nendyaml\n/] - -<1> You'd add any token filters here. diff --git a/docs/reference/analysis/analyzers/lang-analyzer.asciidoc b/docs/reference/analysis/analyzers/lang-analyzer.asciidoc deleted file mode 100644 index 881970787f5a6..0000000000000 --- a/docs/reference/analysis/analyzers/lang-analyzer.asciidoc +++ /dev/null @@ -1,1876 +0,0 @@ -[[analysis-lang-analyzer]] -=== Language analyzers -++++ -Language -++++ - -A set of analyzers aimed at analyzing specific language text. The -following types are supported: -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>, -<>. - -==== Configuring language analyzers - -===== Stopwords - -All analyzers support setting custom `stopwords` either internally in -the config, or by using an external stopwords file by setting -`stopwords_path`. Check <> for -more details. - -[[_excluding_words_from_stemming]] -===== Excluding words from stemming - -The `stem_exclusion` parameter allows you to specify an array -of lowercase words that should not be stemmed. Internally, this -functionality is implemented by adding the -<> -with the `keywords` set to the value of the `stem_exclusion` parameter. - -The following analyzers support setting custom `stem_exclusion` list: -`arabic`, `armenian`, `basque`, `bengali`, `bulgarian`, `catalan`, `czech`, -`dutch`, `english`, `finnish`, `french`, `galician`, -`german`, `hindi`, `hungarian`, `indonesian`, `irish`, `italian`, `latvian`, -`lithuanian`, `norwegian`, `portuguese`, `romanian`, `russian`, `serbian`, -`sorani`, `spanish`, `swedish`, `turkish`. - -==== Reimplementing language analyzers - -The built-in language analyzers can be reimplemented as `custom` analyzers -(as described below) in order to customize their behaviour. - -NOTE: If you do not intend to exclude words from being stemmed (the -equivalent of the `stem_exclusion` parameter above), then you should remove -the `keyword_marker` token filter from the custom analyzer configuration. - -[[arabic-analyzer]] -===== `arabic` analyzer - -The `arabic` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /arabic_example -{ - "settings": { - "analysis": { - "filter": { - "arabic_stop": { - "type": "stop", - "stopwords": "_arabic_" <1> - }, - "arabic_keywords": { - "type": "keyword_marker", - "keywords": ["مثال"] <2> - }, - "arabic_stemmer": { - "type": "stemmer", - "language": "arabic" - } - }, - "analyzer": { - "rebuilt_arabic": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "decimal_digit", - "arabic_stop", - "arabic_normalization", - "arabic_keywords", - "arabic_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"arabic_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: arabic_example, first: arabic, second: rebuilt_arabic}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[armenian-analyzer]] -===== `armenian` analyzer - -The `armenian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /armenian_example -{ - "settings": { - "analysis": { - "filter": { - "armenian_stop": { - "type": "stop", - "stopwords": "_armenian_" <1> - }, - "armenian_keywords": { - "type": "keyword_marker", - "keywords": ["օրինակ"] <2> - }, - "armenian_stemmer": { - "type": "stemmer", - "language": "armenian" - } - }, - "analyzer": { - "rebuilt_armenian": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "armenian_stop", - "armenian_keywords", - "armenian_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"armenian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: armenian_example, first: armenian, second: rebuilt_armenian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[basque-analyzer]] -===== `basque` analyzer - -The `basque` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /basque_example -{ - "settings": { - "analysis": { - "filter": { - "basque_stop": { - "type": "stop", - "stopwords": "_basque_" <1> - }, - "basque_keywords": { - "type": "keyword_marker", - "keywords": ["Adibidez"] <2> - }, - "basque_stemmer": { - "type": "stemmer", - "language": "basque" - } - }, - "analyzer": { - "rebuilt_basque": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "basque_stop", - "basque_keywords", - "basque_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"basque_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: basque_example, first: basque, second: rebuilt_basque}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[bengali-analyzer]] -===== `bengali` analyzer - -The `bengali` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /bengali_example -{ - "settings": { - "analysis": { - "filter": { - "bengali_stop": { - "type": "stop", - "stopwords": "_bengali_" <1> - }, - "bengali_keywords": { - "type": "keyword_marker", - "keywords": ["উদাহরণ"] <2> - }, - "bengali_stemmer": { - "type": "stemmer", - "language": "bengali" - } - }, - "analyzer": { - "rebuilt_bengali": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "decimal_digit", - "bengali_keywords", - "indic_normalization", - "bengali_normalization", - "bengali_stop", - "bengali_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"bengali_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: bengali_example, first: bengali, second: rebuilt_bengali}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[brazilian-analyzer]] -===== `brazilian` analyzer - -The `brazilian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /brazilian_example -{ - "settings": { - "analysis": { - "filter": { - "brazilian_stop": { - "type": "stop", - "stopwords": "_brazilian_" <1> - }, - "brazilian_keywords": { - "type": "keyword_marker", - "keywords": ["exemplo"] <2> - }, - "brazilian_stemmer": { - "type": "stemmer", - "language": "brazilian" - } - }, - "analyzer": { - "rebuilt_brazilian": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "brazilian_stop", - "brazilian_keywords", - "brazilian_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"brazilian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: brazilian_example, first: brazilian, second: rebuilt_brazilian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[bulgarian-analyzer]] -===== `bulgarian` analyzer - -The `bulgarian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /bulgarian_example -{ - "settings": { - "analysis": { - "filter": { - "bulgarian_stop": { - "type": "stop", - "stopwords": "_bulgarian_" <1> - }, - "bulgarian_keywords": { - "type": "keyword_marker", - "keywords": ["пример"] <2> - }, - "bulgarian_stemmer": { - "type": "stemmer", - "language": "bulgarian" - } - }, - "analyzer": { - "rebuilt_bulgarian": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "bulgarian_stop", - "bulgarian_keywords", - "bulgarian_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"bulgarian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: bulgarian_example, first: bulgarian, second: rebuilt_bulgarian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[catalan-analyzer]] -===== `catalan` analyzer - -The `catalan` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /catalan_example -{ - "settings": { - "analysis": { - "filter": { - "catalan_elision": { - "type": "elision", - "articles": [ "d", "l", "m", "n", "s", "t"], - "articles_case": true - }, - "catalan_stop": { - "type": "stop", - "stopwords": "_catalan_" <1> - }, - "catalan_keywords": { - "type": "keyword_marker", - "keywords": ["example"] <2> - }, - "catalan_stemmer": { - "type": "stemmer", - "language": "catalan" - } - }, - "analyzer": { - "rebuilt_catalan": { - "tokenizer": "standard", - "filter": [ - "catalan_elision", - "lowercase", - "catalan_stop", - "catalan_keywords", - "catalan_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"catalan_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: catalan_example, first: catalan, second: rebuilt_catalan}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[cjk-analyzer]] -===== `cjk` analyzer - -NOTE: You may find that `icu_analyzer` in the ICU analysis plugin works better -for CJK text than the `cjk` analyzer. Experiment with your text and queries. - -The `cjk` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /cjk_example -{ - "settings": { - "analysis": { - "filter": { - "english_stop": { - "type": "stop", - "stopwords": [ <1> - "a", "and", "are", "as", "at", "be", "but", "by", "for", - "if", "in", "into", "is", "it", "no", "not", "of", "on", - "or", "s", "such", "t", "that", "the", "their", "then", - "there", "these", "they", "this", "to", "was", "will", - "with", "www" - ] - } - }, - "analyzer": { - "rebuilt_cjk": { - "tokenizer": "standard", - "filter": [ - "cjk_width", - "lowercase", - "cjk_bigram", - "english_stop" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"cjk_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: cjk_example, first: cjk, second: rebuilt_cjk}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. The default stop words are - *almost* the same as the `_english_` set, but not exactly - the same. - -[[czech-analyzer]] -===== `czech` analyzer - -The `czech` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /czech_example -{ - "settings": { - "analysis": { - "filter": { - "czech_stop": { - "type": "stop", - "stopwords": "_czech_" <1> - }, - "czech_keywords": { - "type": "keyword_marker", - "keywords": ["příklad"] <2> - }, - "czech_stemmer": { - "type": "stemmer", - "language": "czech" - } - }, - "analyzer": { - "rebuilt_czech": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "czech_stop", - "czech_keywords", - "czech_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"czech_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: czech_example, first: czech, second: rebuilt_czech}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[danish-analyzer]] -===== `danish` analyzer - -The `danish` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /danish_example -{ - "settings": { - "analysis": { - "filter": { - "danish_stop": { - "type": "stop", - "stopwords": "_danish_" <1> - }, - "danish_keywords": { - "type": "keyword_marker", - "keywords": ["eksempel"] <2> - }, - "danish_stemmer": { - "type": "stemmer", - "language": "danish" - } - }, - "analyzer": { - "rebuilt_danish": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "danish_stop", - "danish_keywords", - "danish_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"danish_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: danish_example, first: danish, second: rebuilt_danish}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[dutch-analyzer]] -===== `dutch` analyzer - -The `dutch` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /dutch_example -{ - "settings": { - "analysis": { - "filter": { - "dutch_stop": { - "type": "stop", - "stopwords": "_dutch_" <1> - }, - "dutch_keywords": { - "type": "keyword_marker", - "keywords": ["voorbeeld"] <2> - }, - "dutch_stemmer": { - "type": "stemmer", - "language": "dutch" - }, - "dutch_override": { - "type": "stemmer_override", - "rules": [ - "fiets=>fiets", - "bromfiets=>bromfiets", - "ei=>eier", - "kind=>kinder" - ] - } - }, - "analyzer": { - "rebuilt_dutch": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "dutch_stop", - "dutch_keywords", - "dutch_override", - "dutch_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"dutch_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: dutch_example, first: dutch, second: rebuilt_dutch}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[english-analyzer]] -===== `english` analyzer - -The `english` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /english_example -{ - "settings": { - "analysis": { - "filter": { - "english_stop": { - "type": "stop", - "stopwords": "_english_" <1> - }, - "english_keywords": { - "type": "keyword_marker", - "keywords": ["example"] <2> - }, - "english_stemmer": { - "type": "stemmer", - "language": "english" - }, - "english_possessive_stemmer": { - "type": "stemmer", - "language": "possessive_english" - } - }, - "analyzer": { - "rebuilt_english": { - "tokenizer": "standard", - "filter": [ - "english_possessive_stemmer", - "lowercase", - "english_stop", - "english_keywords", - "english_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"english_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: english_example, first: english, second: rebuilt_english}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[estonian-analyzer]] -===== `estonian` analyzer - -The `estonian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /estonian_example -{ - "settings": { - "analysis": { - "filter": { - "estonian_stop": { - "type": "stop", - "stopwords": "_estonian_" <1> - }, - "estonian_keywords": { - "type": "keyword_marker", - "keywords": ["näide"] <2> - }, - "estonian_stemmer": { - "type": "stemmer", - "language": "estonian" - } - }, - "analyzer": { - "rebuilt_estonian": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "estonian_stop", - "estonian_keywords", - "estonian_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"estonian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: estonian_example, first: estonian, second: rebuilt_estonian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[finnish-analyzer]] -===== `finnish` analyzer - -The `finnish` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /finnish_example -{ - "settings": { - "analysis": { - "filter": { - "finnish_stop": { - "type": "stop", - "stopwords": "_finnish_" <1> - }, - "finnish_keywords": { - "type": "keyword_marker", - "keywords": ["esimerkki"] <2> - }, - "finnish_stemmer": { - "type": "stemmer", - "language": "finnish" - } - }, - "analyzer": { - "rebuilt_finnish": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "finnish_stop", - "finnish_keywords", - "finnish_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"finnish_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: finnish_example, first: finnish, second: rebuilt_finnish}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[french-analyzer]] -===== `french` analyzer - -The `french` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /french_example -{ - "settings": { - "analysis": { - "filter": { - "french_elision": { - "type": "elision", - "articles_case": true, - "articles": [ - "l", "m", "t", "qu", "n", "s", - "j", "d", "c", "jusqu", "quoiqu", - "lorsqu", "puisqu" - ] - }, - "french_stop": { - "type": "stop", - "stopwords": "_french_" <1> - }, - "french_keywords": { - "type": "keyword_marker", - "keywords": ["Example"] <2> - }, - "french_stemmer": { - "type": "stemmer", - "language": "light_french" - } - }, - "analyzer": { - "rebuilt_french": { - "tokenizer": "standard", - "filter": [ - "french_elision", - "lowercase", - "french_stop", - "french_keywords", - "french_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"french_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: french_example, first: french, second: rebuilt_french}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[galician-analyzer]] -===== `galician` analyzer - -The `galician` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /galician_example -{ - "settings": { - "analysis": { - "filter": { - "galician_stop": { - "type": "stop", - "stopwords": "_galician_" <1> - }, - "galician_keywords": { - "type": "keyword_marker", - "keywords": ["exemplo"] <2> - }, - "galician_stemmer": { - "type": "stemmer", - "language": "galician" - } - }, - "analyzer": { - "rebuilt_galician": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "galician_stop", - "galician_keywords", - "galician_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"galician_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: galician_example, first: galician, second: rebuilt_galician}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[german-analyzer]] -===== `german` analyzer - -The `german` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /german_example -{ - "settings": { - "analysis": { - "filter": { - "german_stop": { - "type": "stop", - "stopwords": "_german_" <1> - }, - "german_keywords": { - "type": "keyword_marker", - "keywords": ["Beispiel"] <2> - }, - "german_stemmer": { - "type": "stemmer", - "language": "light_german" - } - }, - "analyzer": { - "rebuilt_german": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "german_stop", - "german_keywords", - "german_normalization", - "german_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"german_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: german_example, first: german, second: rebuilt_german}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[greek-analyzer]] -===== `greek` analyzer - -The `greek` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /greek_example -{ - "settings": { - "analysis": { - "filter": { - "greek_stop": { - "type": "stop", - "stopwords": "_greek_" <1> - }, - "greek_lowercase": { - "type": "lowercase", - "language": "greek" - }, - "greek_keywords": { - "type": "keyword_marker", - "keywords": ["παράδειγμα"] <2> - }, - "greek_stemmer": { - "type": "stemmer", - "language": "greek" - } - }, - "analyzer": { - "rebuilt_greek": { - "tokenizer": "standard", - "filter": [ - "greek_lowercase", - "greek_stop", - "greek_keywords", - "greek_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"greek_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: greek_example, first: greek, second: rebuilt_greek}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[hindi-analyzer]] -===== `hindi` analyzer - -The `hindi` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /hindi_example -{ - "settings": { - "analysis": { - "filter": { - "hindi_stop": { - "type": "stop", - "stopwords": "_hindi_" <1> - }, - "hindi_keywords": { - "type": "keyword_marker", - "keywords": ["उदाहरण"] <2> - }, - "hindi_stemmer": { - "type": "stemmer", - "language": "hindi" - } - }, - "analyzer": { - "rebuilt_hindi": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "decimal_digit", - "hindi_keywords", - "indic_normalization", - "hindi_normalization", - "hindi_stop", - "hindi_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"hindi_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: hindi_example, first: hindi, second: rebuilt_hindi}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[hungarian-analyzer]] -===== `hungarian` analyzer - -The `hungarian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /hungarian_example -{ - "settings": { - "analysis": { - "filter": { - "hungarian_stop": { - "type": "stop", - "stopwords": "_hungarian_" <1> - }, - "hungarian_keywords": { - "type": "keyword_marker", - "keywords": ["példa"] <2> - }, - "hungarian_stemmer": { - "type": "stemmer", - "language": "hungarian" - } - }, - "analyzer": { - "rebuilt_hungarian": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "hungarian_stop", - "hungarian_keywords", - "hungarian_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"hungarian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: hungarian_example, first: hungarian, second: rebuilt_hungarian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - - -[[indonesian-analyzer]] -===== `indonesian` analyzer - -The `indonesian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /indonesian_example -{ - "settings": { - "analysis": { - "filter": { - "indonesian_stop": { - "type": "stop", - "stopwords": "_indonesian_" <1> - }, - "indonesian_keywords": { - "type": "keyword_marker", - "keywords": ["contoh"] <2> - }, - "indonesian_stemmer": { - "type": "stemmer", - "language": "indonesian" - } - }, - "analyzer": { - "rebuilt_indonesian": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "indonesian_stop", - "indonesian_keywords", - "indonesian_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"indonesian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: indonesian_example, first: indonesian, second: rebuilt_indonesian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[irish-analyzer]] -===== `irish` analyzer - -The `irish` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /irish_example -{ - "settings": { - "analysis": { - "filter": { - "irish_hyphenation": { - "type": "stop", - "stopwords": [ "h", "n", "t" ], - "ignore_case": true - }, - "irish_elision": { - "type": "elision", - "articles": [ "d", "m", "b" ], - "articles_case": true - }, - "irish_stop": { - "type": "stop", - "stopwords": "_irish_" <1> - }, - "irish_lowercase": { - "type": "lowercase", - "language": "irish" - }, - "irish_keywords": { - "type": "keyword_marker", - "keywords": ["sampla"] <2> - }, - "irish_stemmer": { - "type": "stemmer", - "language": "irish" - } - }, - "analyzer": { - "rebuilt_irish": { - "tokenizer": "standard", - "filter": [ - "irish_hyphenation", - "irish_elision", - "irish_lowercase", - "irish_stop", - "irish_keywords", - "irish_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"irish_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: irish_example, first: irish, second: rebuilt_irish}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[italian-analyzer]] -===== `italian` analyzer - -The `italian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /italian_example -{ - "settings": { - "analysis": { - "filter": { - "italian_elision": { - "type": "elision", - "articles": [ - "c", "l", "all", "dall", "dell", - "nell", "sull", "coll", "pell", - "gl", "agl", "dagl", "degl", "negl", - "sugl", "un", "m", "t", "s", "v", "d" - ], - "articles_case": true - }, - "italian_stop": { - "type": "stop", - "stopwords": "_italian_" <1> - }, - "italian_keywords": { - "type": "keyword_marker", - "keywords": ["esempio"] <2> - }, - "italian_stemmer": { - "type": "stemmer", - "language": "light_italian" - } - }, - "analyzer": { - "rebuilt_italian": { - "tokenizer": "standard", - "filter": [ - "italian_elision", - "lowercase", - "italian_stop", - "italian_keywords", - "italian_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"italian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: italian_example, first: italian, second: rebuilt_italian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[latvian-analyzer]] -===== `latvian` analyzer - -The `latvian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /latvian_example -{ - "settings": { - "analysis": { - "filter": { - "latvian_stop": { - "type": "stop", - "stopwords": "_latvian_" <1> - }, - "latvian_keywords": { - "type": "keyword_marker", - "keywords": ["piemērs"] <2> - }, - "latvian_stemmer": { - "type": "stemmer", - "language": "latvian" - } - }, - "analyzer": { - "rebuilt_latvian": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "latvian_stop", - "latvian_keywords", - "latvian_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"latvian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: latvian_example, first: latvian, second: rebuilt_latvian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[lithuanian-analyzer]] -===== `lithuanian` analyzer - -The `lithuanian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /lithuanian_example -{ - "settings": { - "analysis": { - "filter": { - "lithuanian_stop": { - "type": "stop", - "stopwords": "_lithuanian_" <1> - }, - "lithuanian_keywords": { - "type": "keyword_marker", - "keywords": ["pavyzdys"] <2> - }, - "lithuanian_stemmer": { - "type": "stemmer", - "language": "lithuanian" - } - }, - "analyzer": { - "rebuilt_lithuanian": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "lithuanian_stop", - "lithuanian_keywords", - "lithuanian_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"lithuanian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: lithuanian_example, first: lithuanian, second: rebuilt_lithuanian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[norwegian-analyzer]] -===== `norwegian` analyzer - -The `norwegian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /norwegian_example -{ - "settings": { - "analysis": { - "filter": { - "norwegian_stop": { - "type": "stop", - "stopwords": "_norwegian_" <1> - }, - "norwegian_keywords": { - "type": "keyword_marker", - "keywords": ["eksempel"] <2> - }, - "norwegian_stemmer": { - "type": "stemmer", - "language": "norwegian" - } - }, - "analyzer": { - "rebuilt_norwegian": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "norwegian_stop", - "norwegian_keywords", - "norwegian_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"norwegian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: norwegian_example, first: norwegian, second: rebuilt_norwegian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[persian-analyzer]] -===== `persian` analyzer - -The `persian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /persian_example -{ - "settings": { - "analysis": { - "char_filter": { - "zero_width_spaces": { - "type": "mapping", - "mappings": [ "\\u200C=>\\u0020"] <1> - } - }, - "filter": { - "persian_stop": { - "type": "stop", - "stopwords": "_persian_" <2> - } - }, - "analyzer": { - "rebuilt_persian": { - "tokenizer": "standard", - "char_filter": [ "zero_width_spaces" ], - "filter": [ - "lowercase", - "decimal_digit", - "arabic_normalization", - "persian_normalization", - "persian_stop", - "persian_stem" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: persian_example, first: persian, second: rebuilt_persian}\nendyaml\n/] - -<1> Replaces zero-width non-joiners with an ASCII space. -<2> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. - -[[portuguese-analyzer]] -===== `portuguese` analyzer - -The `portuguese` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /portuguese_example -{ - "settings": { - "analysis": { - "filter": { - "portuguese_stop": { - "type": "stop", - "stopwords": "_portuguese_" <1> - }, - "portuguese_keywords": { - "type": "keyword_marker", - "keywords": ["exemplo"] <2> - }, - "portuguese_stemmer": { - "type": "stemmer", - "language": "light_portuguese" - } - }, - "analyzer": { - "rebuilt_portuguese": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "portuguese_stop", - "portuguese_keywords", - "portuguese_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"portuguese_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: portuguese_example, first: portuguese, second: rebuilt_portuguese}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[romanian-analyzer]] -===== `romanian` analyzer - -The `romanian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /romanian_example -{ - "settings": { - "analysis": { - "filter": { - "romanian_stop": { - "type": "stop", - "stopwords": "_romanian_" <1> - }, - "romanian_keywords": { - "type": "keyword_marker", - "keywords": ["exemplu"] <2> - }, - "romanian_stemmer": { - "type": "stemmer", - "language": "romanian" - } - }, - "analyzer": { - "rebuilt_romanian": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "romanian_stop", - "romanian_keywords", - "romanian_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"romanian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: romanian_example, first: romanian, second: rebuilt_romanian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - - -[[russian-analyzer]] -===== `russian` analyzer - -The `russian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /russian_example -{ - "settings": { - "analysis": { - "filter": { - "russian_stop": { - "type": "stop", - "stopwords": "_russian_" <1> - }, - "russian_keywords": { - "type": "keyword_marker", - "keywords": ["пример"] <2> - }, - "russian_stemmer": { - "type": "stemmer", - "language": "russian" - } - }, - "analyzer": { - "rebuilt_russian": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "russian_stop", - "russian_keywords", - "russian_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"russian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: russian_example, first: russian, second: rebuilt_russian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[serbian-analyzer]] -===== `serbian` analyzer - -The `serbian` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /serbian_example -{ - "settings": { - "analysis": { - "filter": { - "serbian_stop": { - "type": "stop", - "stopwords": "_serbian_" <1> - }, - "serbian_keywords": { - "type": "keyword_marker", - "keywords": ["пример"] <2> - }, - "serbian_stemmer": { - "type": "stemmer", - "language": "serbian" - } - }, - "analyzer": { - "rebuilt_serbian": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "serbian_stop", - "serbian_keywords", - "serbian_stemmer", - "serbian_normalization" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"serbian_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: serbian_example, first: serbian, second: rebuilt_serbian}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` -or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should -be excluded from stemming. - -[[sorani-analyzer]] -===== `sorani` analyzer - -The `sorani` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /sorani_example -{ - "settings": { - "analysis": { - "filter": { - "sorani_stop": { - "type": "stop", - "stopwords": "_sorani_" <1> - }, - "sorani_keywords": { - "type": "keyword_marker", - "keywords": ["mînak"] <2> - }, - "sorani_stemmer": { - "type": "stemmer", - "language": "sorani" - } - }, - "analyzer": { - "rebuilt_sorani": { - "tokenizer": "standard", - "filter": [ - "sorani_normalization", - "lowercase", - "decimal_digit", - "sorani_stop", - "sorani_keywords", - "sorani_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"sorani_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: sorani_example, first: sorani, second: rebuilt_sorani}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[spanish-analyzer]] -===== `spanish` analyzer - -The `spanish` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /spanish_example -{ - "settings": { - "analysis": { - "filter": { - "spanish_stop": { - "type": "stop", - "stopwords": "_spanish_" <1> - }, - "spanish_keywords": { - "type": "keyword_marker", - "keywords": ["ejemplo"] <2> - }, - "spanish_stemmer": { - "type": "stemmer", - "language": "light_spanish" - } - }, - "analyzer": { - "rebuilt_spanish": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "spanish_stop", - "spanish_keywords", - "spanish_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"spanish_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: spanish_example, first: spanish, second: rebuilt_spanish}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[swedish-analyzer]] -===== `swedish` analyzer - -The `swedish` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /swedish_example -{ - "settings": { - "analysis": { - "filter": { - "swedish_stop": { - "type": "stop", - "stopwords": "_swedish_" <1> - }, - "swedish_keywords": { - "type": "keyword_marker", - "keywords": ["exempel"] <2> - }, - "swedish_stemmer": { - "type": "stemmer", - "language": "swedish" - } - }, - "analyzer": { - "rebuilt_swedish": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "swedish_stop", - "swedish_keywords", - "swedish_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"swedish_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: swedish_example, first: swedish, second: rebuilt_swedish}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[turkish-analyzer]] -===== `turkish` analyzer - -The `turkish` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /turkish_example -{ - "settings": { - "analysis": { - "filter": { - "turkish_stop": { - "type": "stop", - "stopwords": "_turkish_" <1> - }, - "turkish_lowercase": { - "type": "lowercase", - "language": "turkish" - }, - "turkish_keywords": { - "type": "keyword_marker", - "keywords": ["örnek"] <2> - }, - "turkish_stemmer": { - "type": "stemmer", - "language": "turkish" - } - }, - "analyzer": { - "rebuilt_turkish": { - "tokenizer": "standard", - "filter": [ - "apostrophe", - "turkish_lowercase", - "turkish_stop", - "turkish_keywords", - "turkish_stemmer" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"turkish_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: turkish_example, first: turkish, second: rebuilt_turkish}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> This filter should be removed unless there are words which should - be excluded from stemming. - -[[thai-analyzer]] -===== `thai` analyzer - -The `thai` analyzer could be reimplemented as a `custom` analyzer as follows: - -[source,console] ----------------------------------------------------- -PUT /thai_example -{ - "settings": { - "analysis": { - "filter": { - "thai_stop": { - "type": "stop", - "stopwords": "_thai_" <1> - } - }, - "analyzer": { - "rebuilt_thai": { - "tokenizer": "thai", - "filter": [ - "lowercase", - "decimal_digit", - "thai_stop" - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/"thai_keywords",//] -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: thai_example, first: thai, second: rebuilt_thai}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. diff --git a/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc b/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc deleted file mode 100644 index e685c17f96865..0000000000000 --- a/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc +++ /dev/null @@ -1,411 +0,0 @@ -[[analysis-pattern-analyzer]] -=== Pattern analyzer -++++ -Pattern -++++ - -The `pattern` analyzer uses a regular expression to split the text into terms. -The regular expression should match the *token separators* not the tokens -themselves. The regular expression defaults to `\W+` (or all non-word characters). - -[WARNING] -.Beware of Pathological Regular Expressions -======================================== - -The pattern analyzer uses -https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html[Java Regular Expressions]. - -A badly written regular expression could run very slowly or even throw a -StackOverflowError and cause the node it is running on to exit suddenly. - -Read more about https://www.regular-expressions.info/catastrophic.html[pathological regular expressions and how to avoid them]. - -======================================== - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "analyzer": "pattern", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "the", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "2", - "start_offset": 4, - "end_offset": 5, - "type": "word", - "position": 1 - }, - { - "token": "quick", - "start_offset": 6, - "end_offset": 11, - "type": "word", - "position": 2 - }, - { - "token": "brown", - "start_offset": 12, - "end_offset": 17, - "type": "word", - "position": 3 - }, - { - "token": "foxes", - "start_offset": 18, - "end_offset": 23, - "type": "word", - "position": 4 - }, - { - "token": "jumped", - "start_offset": 24, - "end_offset": 30, - "type": "word", - "position": 5 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "word", - "position": 6 - }, - { - "token": "the", - "start_offset": 36, - "end_offset": 39, - "type": "word", - "position": 7 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "word", - "position": 8 - }, - { - "token": "dog", - "start_offset": 45, - "end_offset": 48, - "type": "word", - "position": 9 - }, - { - "token": "s", - "start_offset": 49, - "end_offset": 50, - "type": "word", - "position": 10 - }, - { - "token": "bone", - "start_offset": 51, - "end_offset": 55, - "type": "word", - "position": 11 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ the, 2, quick, brown, foxes, jumped, over, the, lazy, dog, s, bone ] ---------------------------- - -[discrete] -=== Configuration - -The `pattern` analyzer accepts the following parameters: - -[horizontal] -`pattern`:: - - A https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html[Java regular expression], defaults to `\W+`. - -`flags`:: - - Java regular expression https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html#field.summary[flags]. - Flags should be pipe-separated, eg `"CASE_INSENSITIVE|COMMENTS"`. - -`lowercase`:: - - Should terms be lowercased or not. Defaults to `true`. - -`stopwords`:: - - A pre-defined stop words list like `_english_` or an array containing a - list of stop words. Defaults to `_none_`. - -`stopwords_path`:: - - The path to a file containing stop words. - -See the <> for more information -about stop word configuration. - - -[discrete] -=== Example configuration - -In this example, we configure the `pattern` analyzer to split email addresses -on non-word characters or on underscores (`\W|_`), and to lower-case the result: - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_email_analyzer": { - "type": "pattern", - "pattern": "\\W|_", <1> - "lowercase": true - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_email_analyzer", - "text": "John_Smith@foo-bar.com" -} ----------------------------- - -<1> The backslashes in the pattern need to be escaped when specifying the - pattern as a JSON string. - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "john", - "start_offset": 0, - "end_offset": 4, - "type": "word", - "position": 0 - }, - { - "token": "smith", - "start_offset": 5, - "end_offset": 10, - "type": "word", - "position": 1 - }, - { - "token": "foo", - "start_offset": 11, - "end_offset": 14, - "type": "word", - "position": 2 - }, - { - "token": "bar", - "start_offset": 15, - "end_offset": 18, - "type": "word", - "position": 3 - }, - { - "token": "com", - "start_offset": 19, - "end_offset": 22, - "type": "word", - "position": 4 - } - ] -} ----------------------------- - -///////////////////// - - -The above example produces the following terms: - -[source,text] ---------------------------- -[ john, smith, foo, bar, com ] ---------------------------- - -[discrete] -==== CamelCase tokenizer - -The following more complicated example splits CamelCase text into tokens: - -[source,console] --------------------------------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "camel": { - "type": "pattern", - "pattern": "([^\\p{L}\\d]+)|(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)|(?<=[\\p{L}&&[^\\p{Lu}]])(?=\\p{Lu})|(?<=\\p{Lu})(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])" - } - } - } - } -} - -GET my-index-000001/_analyze -{ - "analyzer": "camel", - "text": "MooseX::FTPClass2_beta" -} --------------------------------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "moose", - "start_offset": 0, - "end_offset": 5, - "type": "word", - "position": 0 - }, - { - "token": "x", - "start_offset": 5, - "end_offset": 6, - "type": "word", - "position": 1 - }, - { - "token": "ftp", - "start_offset": 8, - "end_offset": 11, - "type": "word", - "position": 2 - }, - { - "token": "class", - "start_offset": 11, - "end_offset": 16, - "type": "word", - "position": 3 - }, - { - "token": "2", - "start_offset": 16, - "end_offset": 17, - "type": "word", - "position": 4 - }, - { - "token": "beta", - "start_offset": 18, - "end_offset": 22, - "type": "word", - "position": 5 - } - ] -} ----------------------------- - -///////////////////// - - -The above example produces the following terms: - -[source,text] ---------------------------- -[ moose, x, ftp, class, 2, beta ] ---------------------------- - -The regex above is easier to understand as: - -[source,regex] --------------------------------------------------- - ([^\p{L}\d]+) # swallow non letters and numbers, -| (?<=\D)(?=\d) # or non-number followed by number, -| (?<=\d)(?=\D) # or number followed by non-number, -| (?<=[ \p{L} && [^\p{Lu}]]) # or lower case - (?=\p{Lu}) # followed by upper case, -| (?<=\p{Lu}) # or upper case - (?=\p{Lu} # followed by upper case - [\p{L}&&[^\p{Lu}]] # then lower case - ) --------------------------------------------------- - -[discrete] -=== Definition - -The `pattern` analyzer consists of: - -Tokenizer:: -* <> - -Token Filters:: -* <> -* <> (disabled by default) - -If you need to customize the `pattern` analyzer beyond the configuration -parameters then you need to recreate it as a `custom` analyzer and modify -it, usually by adding token filters. This would recreate the built-in -`pattern` analyzer and you can use it as a starting point for further -customization: - -[source,console] ----------------------------------------------------- -PUT /pattern_example -{ - "settings": { - "analysis": { - "tokenizer": { - "split_on_non_word": { - "type": "pattern", - "pattern": "\\W+" <1> - } - }, - "analyzer": { - "rebuilt_pattern": { - "tokenizer": "split_on_non_word", - "filter": [ - "lowercase" <2> - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: pattern_example, first: pattern, second: rebuilt_pattern}\nendyaml\n/] -<1> The default pattern is `\W+` which splits on non-word characters -and this is where you'd change it. -<2> You'd add other token filters after `lowercase`. diff --git a/docs/reference/analysis/analyzers/simple-analyzer.asciidoc b/docs/reference/analysis/analyzers/simple-analyzer.asciidoc deleted file mode 100644 index e67e1a06f04ac..0000000000000 --- a/docs/reference/analysis/analyzers/simple-analyzer.asciidoc +++ /dev/null @@ -1,150 +0,0 @@ -[[analysis-simple-analyzer]] -=== Simple analyzer -++++ -Simple -++++ - -The `simple` analyzer breaks text into tokens at any non-letter character, such -as numbers, spaces, hyphens and apostrophes, discards non-letter characters, -and changes uppercase to lowercase. - -[[analysis-simple-analyzer-ex]] -==== Example - -[source,console] ----- -POST _analyze -{ - "analyzer": "simple", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "the", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "quick", - "start_offset": 6, - "end_offset": 11, - "type": "word", - "position": 1 - }, - { - "token": "brown", - "start_offset": 12, - "end_offset": 17, - "type": "word", - "position": 2 - }, - { - "token": "foxes", - "start_offset": 18, - "end_offset": 23, - "type": "word", - "position": 3 - }, - { - "token": "jumped", - "start_offset": 24, - "end_offset": 30, - "type": "word", - "position": 4 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "word", - "position": 5 - }, - { - "token": "the", - "start_offset": 36, - "end_offset": 39, - "type": "word", - "position": 6 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "word", - "position": 7 - }, - { - "token": "dog", - "start_offset": 45, - "end_offset": 48, - "type": "word", - "position": 8 - }, - { - "token": "s", - "start_offset": 49, - "end_offset": 50, - "type": "word", - "position": 9 - }, - { - "token": "bone", - "start_offset": 51, - "end_offset": 55, - "type": "word", - "position": 10 - } - ] -} ----- -//// - -The `simple` analyzer parses the sentence and produces the following -tokens: - -[source,text] ----- -[ the, quick, brown, foxes, jumped, over, the, lazy, dog, s, bone ] ----- - -[[analysis-simple-analyzer-definition]] -==== Definition - -The `simple` analyzer is defined by one tokenizer: - -Tokenizer:: -* <> - -[[analysis-simple-analyzer-customize]] -==== Customize - -To customize the `simple` analyzer, duplicate it to create the basis for -a custom analyzer. This custom analyzer can be modified as required, usually by -adding token filters. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_custom_simple_analyzer": { - "tokenizer": "lowercase", - "filter": [ <1> - ] - } - } - } - } -} ----- -<1> Add token filters here. diff --git a/docs/reference/analysis/analyzers/standard-analyzer.asciidoc b/docs/reference/analysis/analyzers/standard-analyzer.asciidoc deleted file mode 100644 index ea079b8718181..0000000000000 --- a/docs/reference/analysis/analyzers/standard-analyzer.asciidoc +++ /dev/null @@ -1,302 +0,0 @@ -[[analysis-standard-analyzer]] -=== Standard analyzer -++++ -Standard -++++ - -The `standard` analyzer is the default analyzer which is used if none is -specified. It provides grammar based tokenization (based on the Unicode Text -Segmentation algorithm, as specified in -https://unicode.org/reports/tr29/[Unicode Standard Annex #29]) and works well -for most languages. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "analyzer": "standard", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "the", - "start_offset": 0, - "end_offset": 3, - "type": "", - "position": 0 - }, - { - "token": "2", - "start_offset": 4, - "end_offset": 5, - "type": "", - "position": 1 - }, - { - "token": "quick", - "start_offset": 6, - "end_offset": 11, - "type": "", - "position": 2 - }, - { - "token": "brown", - "start_offset": 12, - "end_offset": 17, - "type": "", - "position": 3 - }, - { - "token": "foxes", - "start_offset": 18, - "end_offset": 23, - "type": "", - "position": 4 - }, - { - "token": "jumped", - "start_offset": 24, - "end_offset": 30, - "type": "", - "position": 5 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "", - "position": 6 - }, - { - "token": "the", - "start_offset": 36, - "end_offset": 39, - "type": "", - "position": 7 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "", - "position": 8 - }, - { - "token": "dog's", - "start_offset": 45, - "end_offset": 50, - "type": "", - "position": 9 - }, - { - "token": "bone", - "start_offset": 51, - "end_offset": 55, - "type": "", - "position": 10 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ the, 2, quick, brown, foxes, jumped, over, the, lazy, dog's, bone ] ---------------------------- - -[discrete] -=== Configuration - -The `standard` analyzer accepts the following parameters: - -[horizontal] -`max_token_length`:: - - The maximum token length. If a token is seen that exceeds this length then - it is split at `max_token_length` intervals. Defaults to `255`. - -`stopwords`:: - - A pre-defined stop words list like `_english_` or an array containing a - list of stop words. Defaults to `_none_`. - -`stopwords_path`:: - - The path to a file containing stop words. - -See the <> for more information -about stop word configuration. - - -[discrete] -=== Example configuration - -In this example, we configure the `standard` analyzer to have a -`max_token_length` of 5 (for demonstration purposes), and to use the -pre-defined list of English stop words: - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_english_analyzer": { - "type": "standard", - "max_token_length": 5, - "stopwords": "_english_" - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_english_analyzer", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "2", - "start_offset": 4, - "end_offset": 5, - "type": "", - "position": 1 - }, - { - "token": "quick", - "start_offset": 6, - "end_offset": 11, - "type": "", - "position": 2 - }, - { - "token": "brown", - "start_offset": 12, - "end_offset": 17, - "type": "", - "position": 3 - }, - { - "token": "foxes", - "start_offset": 18, - "end_offset": 23, - "type": "", - "position": 4 - }, - { - "token": "jumpe", - "start_offset": 24, - "end_offset": 29, - "type": "", - "position": 5 - }, - { - "token": "d", - "start_offset": 29, - "end_offset": 30, - "type": "", - "position": 6 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "", - "position": 7 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "", - "position": 9 - }, - { - "token": "dog's", - "start_offset": 45, - "end_offset": 50, - "type": "", - "position": 10 - }, - { - "token": "bone", - "start_offset": 51, - "end_offset": 55, - "type": "", - "position": 11 - } - ] -} ----------------------------- - -///////////////////// - -The above example produces the following terms: - -[source,text] ---------------------------- -[ 2, quick, brown, foxes, jumpe, d, over, lazy, dog's, bone ] ---------------------------- - -[discrete] -=== Definition - -The `standard` analyzer consists of: - -Tokenizer:: -* <> - -Token Filters:: -* <> -* <> (disabled by default) - -If you need to customize the `standard` analyzer beyond the configuration -parameters then you need to recreate it as a `custom` analyzer and modify -it, usually by adding token filters. This would recreate the built-in -`standard` analyzer and you can use it as a starting point: - -[source,console] ----------------------------------------------------- -PUT /standard_example -{ - "settings": { - "analysis": { - "analyzer": { - "rebuilt_standard": { - "tokenizer": "standard", - "filter": [ - "lowercase" <1> - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: standard_example, first: standard, second: rebuilt_standard}\nendyaml\n/] -<1> You'd add any token filters after `lowercase`. diff --git a/docs/reference/analysis/analyzers/stop-analyzer.asciidoc b/docs/reference/analysis/analyzers/stop-analyzer.asciidoc deleted file mode 100644 index 0a156cca1add6..0000000000000 --- a/docs/reference/analysis/analyzers/stop-analyzer.asciidoc +++ /dev/null @@ -1,276 +0,0 @@ -[[analysis-stop-analyzer]] -=== Stop analyzer -++++ -Stop -++++ - -The `stop` analyzer is the same as the <> -but adds support for removing stop words. It defaults to using the -`_english_` stop words. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "analyzer": "stop", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "quick", - "start_offset": 6, - "end_offset": 11, - "type": "word", - "position": 1 - }, - { - "token": "brown", - "start_offset": 12, - "end_offset": 17, - "type": "word", - "position": 2 - }, - { - "token": "foxes", - "start_offset": 18, - "end_offset": 23, - "type": "word", - "position": 3 - }, - { - "token": "jumped", - "start_offset": 24, - "end_offset": 30, - "type": "word", - "position": 4 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "word", - "position": 5 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "word", - "position": 7 - }, - { - "token": "dog", - "start_offset": 45, - "end_offset": 48, - "type": "word", - "position": 8 - }, - { - "token": "s", - "start_offset": 49, - "end_offset": 50, - "type": "word", - "position": 9 - }, - { - "token": "bone", - "start_offset": 51, - "end_offset": 55, - "type": "word", - "position": 10 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ quick, brown, foxes, jumped, over, lazy, dog, s, bone ] ---------------------------- - -[discrete] -=== Configuration - -The `stop` analyzer accepts the following parameters: - -[horizontal] -`stopwords`:: - - A pre-defined stop words list like `_english_` or an array containing a - list of stop words. Defaults to `_english_`. - -`stopwords_path`:: - - The path to a file containing stop words. This path is relative to the - Elasticsearch `config` directory. - - -See the <> for more information -about stop word configuration. - -[discrete] -=== Example configuration - -In this example, we configure the `stop` analyzer to use a specified list of -words as stop words: - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_stop_analyzer": { - "type": "stop", - "stopwords": ["the", "over"] - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_stop_analyzer", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "quick", - "start_offset": 6, - "end_offset": 11, - "type": "word", - "position": 1 - }, - { - "token": "brown", - "start_offset": 12, - "end_offset": 17, - "type": "word", - "position": 2 - }, - { - "token": "foxes", - "start_offset": 18, - "end_offset": 23, - "type": "word", - "position": 3 - }, - { - "token": "jumped", - "start_offset": 24, - "end_offset": 30, - "type": "word", - "position": 4 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "word", - "position": 7 - }, - { - "token": "dog", - "start_offset": 45, - "end_offset": 48, - "type": "word", - "position": 8 - }, - { - "token": "s", - "start_offset": 49, - "end_offset": 50, - "type": "word", - "position": 9 - }, - { - "token": "bone", - "start_offset": 51, - "end_offset": 55, - "type": "word", - "position": 10 - } - ] -} ----------------------------- - -///////////////////// - - -The above example produces the following terms: - -[source,text] ---------------------------- -[ quick, brown, foxes, jumped, lazy, dog, s, bone ] ---------------------------- - -[discrete] -=== Definition - -It consists of: - -Tokenizer:: -* <> - -Token filters:: -* <> - -If you need to customize the `stop` analyzer beyond the configuration -parameters then you need to recreate it as a `custom` analyzer and modify -it, usually by adding token filters. This would recreate the built-in -`stop` analyzer and you can use it as a starting point for further -customization: - -[source,console] ----------------------------------------------------- -PUT /stop_example -{ - "settings": { - "analysis": { - "filter": { - "english_stop": { - "type": "stop", - "stopwords": "_english_" <1> - } - }, - "analyzer": { - "rebuilt_stop": { - "tokenizer": "lowercase", - "filter": [ - "english_stop" <2> - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: stop_example, first: stop, second: rebuilt_stop}\nendyaml\n/] - -<1> The default stopwords can be overridden with the `stopwords` - or `stopwords_path` parameters. -<2> You'd add any token filters after `english_stop`. diff --git a/docs/reference/analysis/analyzers/whitespace-analyzer.asciidoc b/docs/reference/analysis/analyzers/whitespace-analyzer.asciidoc deleted file mode 100644 index 3af4f140b5868..0000000000000 --- a/docs/reference/analysis/analyzers/whitespace-analyzer.asciidoc +++ /dev/null @@ -1,149 +0,0 @@ -[[analysis-whitespace-analyzer]] -=== Whitespace analyzer -++++ -Whitespace -++++ - -The `whitespace` analyzer breaks text into terms whenever it encounters a -whitespace character. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "analyzer": "whitespace", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "The", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "2", - "start_offset": 4, - "end_offset": 5, - "type": "word", - "position": 1 - }, - { - "token": "QUICK", - "start_offset": 6, - "end_offset": 11, - "type": "word", - "position": 2 - }, - { - "token": "Brown-Foxes", - "start_offset": 12, - "end_offset": 23, - "type": "word", - "position": 3 - }, - { - "token": "jumped", - "start_offset": 24, - "end_offset": 30, - "type": "word", - "position": 4 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "word", - "position": 5 - }, - { - "token": "the", - "start_offset": 36, - "end_offset": 39, - "type": "word", - "position": 6 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "word", - "position": 7 - }, - { - "token": "dog's", - "start_offset": 45, - "end_offset": 50, - "type": "word", - "position": 8 - }, - { - "token": "bone.", - "start_offset": 51, - "end_offset": 56, - "type": "word", - "position": 9 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ The, 2, QUICK, Brown-Foxes, jumped, over, the, lazy, dog's, bone. ] ---------------------------- - -[discrete] -=== Configuration - -The `whitespace` analyzer is not configurable. - -[discrete] -=== Definition - -It consists of: - -Tokenizer:: -* <> - -If you need to customize the `whitespace` analyzer then you need to -recreate it as a `custom` analyzer and modify it, usually by adding -token filters. This would recreate the built-in `whitespace` analyzer -and you can use it as a starting point for further customization: - -[source,console] ----------------------------------------------------- -PUT /whitespace_example -{ - "settings": { - "analysis": { - "analyzer": { - "rebuilt_whitespace": { - "tokenizer": "whitespace", - "filter": [ <1> - ] - } - } - } - } -} ----------------------------------------------------- -// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: whitespace_example, first: whitespace, second: rebuilt_whitespace}\nendyaml\n/] - -<1> You'd add any token filters here. diff --git a/docs/reference/analysis/anatomy.asciidoc b/docs/reference/analysis/anatomy.asciidoc deleted file mode 100644 index f01a22ec4e6ee..0000000000000 --- a/docs/reference/analysis/anatomy.asciidoc +++ /dev/null @@ -1,55 +0,0 @@ -[[analyzer-anatomy]] -=== Anatomy of an analyzer - -An _analyzer_ -- whether built-in or custom -- is just a package which -contains three lower-level building blocks: _character filters_, -_tokenizers_, and _token filters_. - -The built-in <> pre-package these building -blocks into analyzers suitable for different languages and types of text. -Elasticsearch also exposes the individual building blocks so that they can be -combined to define new <> analyzers. - -[[analyzer-anatomy-character-filters]] -==== Character filters - -A _character filter_ receives the original text as a stream of characters and -can transform the stream by adding, removing, or changing characters. For -instance, a character filter could be used to convert Hindu-Arabic numerals -(٠‎١٢٣٤٥٦٧٨‎٩‎) into their Arabic-Latin equivalents (0123456789), or to strip HTML -elements like `` from the stream. - -An analyzer may have *zero or more* <>, -which are applied in order. - -[[analyzer-anatomy-tokenizer]] -==== Tokenizer - -A _tokenizer_ receives a stream of characters, breaks it up into individual -_tokens_ (usually individual words), and outputs a stream of _tokens_. For -instance, a <> tokenizer breaks -text into tokens whenever it sees any whitespace. It would convert the text -`"Quick brown fox!"` into the terms `[Quick, brown, fox!]`. - -The tokenizer is also responsible for recording the order or _position_ of -each term and the start and end _character offsets_ of the original word which -the term represents. - -An analyzer must have *exactly one* <>. - -[[analyzer-anatomy-token-filters]] -==== Token filters - -A _token filter_ receives the token stream and may add, remove, or change -tokens. For example, a <> token -filter converts all tokens to lowercase, a -<> token filter removes common words -(_stop words_) like `the` from the token stream, and a -<> token filter introduces synonyms -into the token stream. - -Token filters are not allowed to change the position or character offsets of -each token. - -An analyzer may have *zero or more* <>, -which are applied in order. \ No newline at end of file diff --git a/docs/reference/analysis/charfilters.asciidoc b/docs/reference/analysis/charfilters.asciidoc deleted file mode 100644 index 93054cf8e618b..0000000000000 --- a/docs/reference/analysis/charfilters.asciidoc +++ /dev/null @@ -1,36 +0,0 @@ -[[analysis-charfilters]] -== Character filters reference - -_Character filters_ are used to preprocess the stream of characters before it -is passed to the <>. - -A character filter receives the original text as a stream of characters and -can transform the stream by adding, removing, or changing characters. For -instance, a character filter could be used to convert Hindu-Arabic numerals -(٠‎١٢٣٤٥٦٧٨‎٩‎) into their Arabic-Latin equivalents (0123456789), or to strip HTML -elements like `` from the stream. - - -Elasticsearch has a number of built in character filters which can be used to build -<>. - -<>:: - -The `html_strip` character filter strips out HTML elements like `` and -decodes HTML entities like `&`. - -<>:: - -The `mapping` character filter replaces any occurrences of the specified -strings with the specified replacements. - -<>:: - -The `pattern_replace` character filter replaces any characters matching a -regular expression with the specified replacement. - -include::charfilters/htmlstrip-charfilter.asciidoc[] - -include::charfilters/mapping-charfilter.asciidoc[] - -include::charfilters/pattern-replace-charfilter.asciidoc[] diff --git a/docs/reference/analysis/charfilters/htmlstrip-charfilter.asciidoc b/docs/reference/analysis/charfilters/htmlstrip-charfilter.asciidoc deleted file mode 100644 index 237339d9744d9..0000000000000 --- a/docs/reference/analysis/charfilters/htmlstrip-charfilter.asciidoc +++ /dev/null @@ -1,130 +0,0 @@ -[[analysis-htmlstrip-charfilter]] -=== HTML strip character filter -++++ -HTML strip -++++ - -Strips HTML elements from a text and replaces HTML entities with their decoded -value (e.g, replaces `&` with `&`). - -The `html_strip` filter uses Lucene's -{lucene-analysis-docs}/charfilter/HTMLStripCharFilter.html[HTMLStripCharFilter]. - -[[analysis-htmlstrip-charfilter-analyze-ex]] -==== Example - -The following <> request uses the -`html_strip` filter to change the text `

I'm so happy!

` to -`\nI'm so happy!\n`. - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "keyword", - "char_filter": [ - "html_strip" - ], - "text": "

I'm so happy!

" -} ----- - -The filter produces the following text: - -[source,text] ----- -[ \nI'm so happy!\n ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "\nI'm so happy!\n", - "start_offset": 0, - "end_offset": 32, - "type": "word", - "position": 0 - } - ] -} ----- -//// - -[[analysis-htmlstrip-charfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`html_strip` filter to configure a new -<>. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "keyword", - "char_filter": [ - "html_strip" - ] - } - } - } - } -} ----- - -[[analysis-htmlstrip-charfilter-configure-parms]] -==== Configurable parameters - -`escaped_tags`:: -(Optional, array of strings) -Array of HTML elements without enclosing angle brackets (`< >`). The filter -skips these HTML elements when stripping HTML from the text. For example, a -value of `[ "p" ]` skips the `

` HTML element. - -[[analysis-htmlstrip-charfilter-customize]] -==== Customize - -To customize the `html_strip` filter, duplicate it to create the basis for a new -custom character filter. You can modify the filter using its configurable -parameters. - -The following <> request -configures a new <> using a custom -`html_strip` filter, `my_custom_html_strip_char_filter`. - -The `my_custom_html_strip_char_filter` filter skips the removal of the `` -HTML element. - -[source,console] ----- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "keyword", - "char_filter": [ - "my_custom_html_strip_char_filter" - ] - } - }, - "char_filter": { - "my_custom_html_strip_char_filter": { - "type": "html_strip", - "escaped_tags": [ - "b" - ] - } - } - } - } -} ----- diff --git a/docs/reference/analysis/charfilters/mapping-charfilter.asciidoc b/docs/reference/analysis/charfilters/mapping-charfilter.asciidoc deleted file mode 100644 index ecb73164e6a91..0000000000000 --- a/docs/reference/analysis/charfilters/mapping-charfilter.asciidoc +++ /dev/null @@ -1,173 +0,0 @@ -[[analysis-mapping-charfilter]] -=== Mapping character filter -++++ -Mapping -++++ - -The `mapping` character filter accepts a map of keys and values. Whenever it -encounters a string of characters that is the same as a key, it replaces them -with the value associated with that key. - -Matching is greedy; the longest pattern matching at a given point wins. -Replacements are allowed to be the empty string. - -The `mapping` filter uses Lucene's -{lucene-analysis-docs}/charfilter/MappingCharFilter.html[MappingCharFilter]. - -[[analysis-mapping-charfilter-analyze-ex]] -==== Example - -The following <> request uses the `mapping` filter -to convert Hindu-Arabic numerals (٠‎١٢٣٤٥٦٧٨‎٩‎) into their Arabic-Latin -equivalents (0123456789), changing the text `My license plate is ٢٥٠١٥` to -`My license plate is 25015`. - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "keyword", - "char_filter": [ - { - "type": "mapping", - "mappings": [ - "٠ => 0", - "١ => 1", - "٢ => 2", - "٣ => 3", - "٤ => 4", - "٥ => 5", - "٦ => 6", - "٧ => 7", - "٨ => 8", - "٩ => 9" - ] - } - ], - "text": "My license plate is ٢٥٠١٥" -} ----- - -The filter produces the following text: - -[source,text] ----- -[ My license plate is 25015 ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "My license plate is 25015", - "start_offset": 0, - "end_offset": 25, - "type": "word", - "position": 0 - } - ] -} ----- -//// - -[[analysis-mapping-charfilter-configure-parms]] -==== Configurable parameters - -`mappings`:: -(Required*, array of strings) -Array of mappings, with each element having the form `key => value`. -+ -Either this or the `mappings_path` parameter must be specified. - -`mappings_path`:: -(Required*, string) -Path to a file containing `key => value` mappings. -+ -This path must be absolute or relative to the `config` location, and the file -must be UTF-8 encoded. Each mapping in the file must be separated by a line -break. -+ -Either this or the `mappings` parameter must be specified. - -[[analysis-mapping-charfilter-customize]] -==== Customize and add to an analyzer - -To customize the `mappings` filter, duplicate it to create the basis for a new -custom character filter. You can modify the filter using its configurable -parameters. - -The following <> request -configures a new <> using a custom -`mappings` filter, `my_mappings_char_filter`. - -The `my_mappings_char_filter` filter replaces the `:)` and `:(` emoticons -with a text equivalent. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "standard", - "char_filter": [ - "my_mappings_char_filter" - ] - } - }, - "char_filter": { - "my_mappings_char_filter": { - "type": "mapping", - "mappings": [ - ":) => _happy_", - ":( => _sad_" - ] - } - } - } - } -} ----- - -The following <> request uses the custom -`my_mappings_char_filter` to replace `:(` with `_sad_` in -the text `I'm delighted about it :(`. - -[source,console] ----- -GET /my-index-000001/_analyze -{ - "tokenizer": "keyword", - "char_filter": [ "my_mappings_char_filter" ], - "text": "I'm delighted about it :(" -} ----- -// TEST[continued] - -The filter produces the following text: - -[source,text] ---------------------------- -[ I'm delighted about it _sad_ ] ---------------------------- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "I'm delighted about it _sad_", - "start_offset": 0, - "end_offset": 25, - "type": "word", - "position": 0 - } - ] -} ----- -//// diff --git a/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc b/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc deleted file mode 100644 index 4d82778861a95..0000000000000 --- a/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc +++ /dev/null @@ -1,266 +0,0 @@ -[[analysis-pattern-replace-charfilter]] -=== Pattern replace character filter -++++ -Pattern replace -++++ - -The `pattern_replace` character filter uses a regular expression to match -characters which should be replaced with the specified replacement string. -The replacement string can refer to capture groups in the regular expression. - -[WARNING] -.Beware of Pathological Regular Expressions -======================================== - -The pattern replace character filter uses -https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html[Java Regular Expressions]. - -A badly written regular expression could run very slowly or even throw a -StackOverflowError and cause the node it is running on to exit suddenly. - -Read more about https://www.regular-expressions.info/catastrophic.html[pathological regular expressions and how to avoid them]. - -======================================== - -[discrete] -=== Configuration - -The `pattern_replace` character filter accepts the following parameters: - -[horizontal] -`pattern`:: - - A https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html[Java regular expression]. Required. - -`replacement`:: - - The replacement string, which can reference capture groups using the - `$1`..`$9` syntax, as explained - https://docs.oracle.com/javase/8/docs/api/java/util/regex/Matcher.html#appendReplacement-java.lang.StringBuffer-java.lang.String-[here]. - -`flags`:: - - Java regular expression https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html#field.summary[flags]. - Flags should be pipe-separated, eg `"CASE_INSENSITIVE|COMMENTS"`. - -[discrete] -=== Example configuration - -In this example, we configure the `pattern_replace` character filter to -replace any embedded dashes in numbers with underscores, i.e `123-456-789` -> -`123_456_789`: - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "standard", - "char_filter": [ - "my_char_filter" - ] - } - }, - "char_filter": { - "my_char_filter": { - "type": "pattern_replace", - "pattern": "(\\d+)-(?=\\d)", - "replacement": "$1_" - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_analyzer", - "text": "My credit card is 123-456-789" -} ----------------------------- -// TEST[s/\$1//] -// the test framework doesn't like the $1 so we just throw it away rather than -// try to get it to work properly. At least we are still testing the charfilter. - -The above example produces the following terms: - -[source,text] ---------------------------- -[ My, credit, card, is, 123_456_789 ] ---------------------------- - -WARNING: Using a replacement string that changes the length of the original -text will work for search purposes, but will result in incorrect highlighting, -as can be seen in the following example. - -This example inserts a space whenever it encounters a lower-case letter -followed by an upper-case letter (i.e. `fooBarBaz` -> `foo Bar Baz`), allowing -camelCase words to be queried individually: - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "standard", - "char_filter": [ - "my_char_filter" - ], - "filter": [ - "lowercase" - ] - } - }, - "char_filter": { - "my_char_filter": { - "type": "pattern_replace", - "pattern": "(?<=\\p{Lower})(?=\\p{Upper})", - "replacement": " " - } - } - } - }, - "mappings": { - "properties": { - "text": { - "type": "text", - "analyzer": "my_analyzer" - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_analyzer", - "text": "The fooBarBaz method" -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "the", - "start_offset": 0, - "end_offset": 3, - "type": "", - "position": 0 - }, - { - "token": "foo", - "start_offset": 4, - "end_offset": 6, - "type": "", - "position": 1 - }, - { - "token": "bar", - "start_offset": 7, - "end_offset": 9, - "type": "", - "position": 2 - }, - { - "token": "baz", - "start_offset": 10, - "end_offset": 13, - "type": "", - "position": 3 - }, - { - "token": "method", - "start_offset": 14, - "end_offset": 20, - "type": "", - "position": 4 - } - ] -} ----------------------------- - -///////////////////// - -The above returns the following terms: - -[source,text] ----------------------------- -[ the, foo, bar, baz, method ] ----------------------------- - -Querying for `bar` will find the document correctly, but highlighting on the -result will produce incorrect highlights, because our character filter changed -the length of the original text: - -[source,console] ----------------------------- -PUT my-index-000001/_doc/1?refresh -{ - "text": "The fooBarBaz method" -} - -GET my-index-000001/_search -{ - "query": { - "match": { - "text": "bar" - } - }, - "highlight": { - "fields": { - "text": {} - } - } -} ----------------------------- -// TEST[continued] - -The output from the above is: - -[source,console-result] ----------------------------- -{ - "timed_out": false, - "took": $body.took, - "_shards": { - "total": 1, - "successful": 1, - "skipped" : 0, - "failed": 0 - }, - "hits": { - "total" : { - "value": 1, - "relation": "eq" - }, - "max_score": 0.2876821, - "hits": [ - { - "_index": "my-index-000001", - "_id": "1", - "_score": 0.2876821, - "_source": { - "text": "The fooBarBaz method" - }, - "highlight": { - "text": [ - "The fooBarBaz method" <1> - ] - } - } - ] - } -} ----------------------------- -// TESTRESPONSE[s/"took".*/"took": "$body.took",/] - -<1> Note the incorrect highlight. diff --git a/docs/reference/analysis/concepts.asciidoc b/docs/reference/analysis/concepts.asciidoc deleted file mode 100644 index 9ff605227b866..0000000000000 --- a/docs/reference/analysis/concepts.asciidoc +++ /dev/null @@ -1,17 +0,0 @@ -[[analysis-concepts]] -== Text analysis concepts -++++ -Concepts -++++ - -This section explains the fundamental concepts of text analysis in {es}. - -* <> -* <> -* <> -* <> - -include::anatomy.asciidoc[] -include::index-search-time.asciidoc[] -include::stemming.asciidoc[] -include::token-graphs.asciidoc[] \ No newline at end of file diff --git a/docs/reference/analysis/configure-text-analysis.asciidoc b/docs/reference/analysis/configure-text-analysis.asciidoc deleted file mode 100644 index ddafc257e94e9..0000000000000 --- a/docs/reference/analysis/configure-text-analysis.asciidoc +++ /dev/null @@ -1,32 +0,0 @@ -[[configure-text-analysis]] -== Configure text analysis - -By default, {es} uses the <> for -all text analysis. The `standard` analyzer gives you out-of-the-box support for -most natural languages and use cases. If you chose to use the `standard` -analyzer as-is, no further configuration is needed. - -If the standard analyzer does not fit your needs, review and test {es}'s other -built-in <>. Built-in analyzers don't -require configuration, but some support options that can be used to adjust their -behavior. For example, you can configure the `standard` analyzer with a list of -custom stop words to remove. - -If no built-in analyzer fits your needs, you can test and create a custom -analyzer. Custom analyzers involve selecting and combining different -<>, giving you greater control over -the process. - -* <> -* <> -* <> -* <> - - -include::testing.asciidoc[] - -include::analyzers/configuring.asciidoc[] - -include::analyzers/custom-analyzer.asciidoc[] - -include::specify-analyzer.asciidoc[] \ No newline at end of file diff --git a/docs/reference/analysis/index-search-time.asciidoc b/docs/reference/analysis/index-search-time.asciidoc deleted file mode 100644 index 70b49c8e639ba..0000000000000 --- a/docs/reference/analysis/index-search-time.asciidoc +++ /dev/null @@ -1,175 +0,0 @@ -[[analysis-index-search-time]] -=== Index and search analysis - -Text analysis occurs at two times: - -Index time:: -When a document is indexed, any <> field values are analyzed. - -Search time:: -When running a <> on a `text` field, -the query string (the text the user is searching for) is analyzed. -+ -Search time is also called _query time_. - -The analyzer, or set of analysis rules, used at each time is called the _index -analyzer_ or _search analyzer_ respectively. - -[[analysis-same-index-search-analyzer]] -==== How the index and search analyzer work together - -In most cases, the same analyzer should be used at index and search time. This -ensures the values and query strings for a field are changed into the same form -of tokens. In turn, this ensures the tokens match as expected during a search. - -.**Example** -[%collapsible] -==== - -A document is indexed with the following value in a `text` field: - -[source,text] ------- -The QUICK brown foxes jumped over the dog! ------- - -The index analyzer for the field converts the value into tokens and normalizes -them. In this case, each of the tokens represents a word: - -[source,text] ------- -[ quick, brown, fox, jump, over, dog ] ------- - -These tokens are then indexed. - -Later, a user searches the same `text` field for: - -[source,text] ------- -"Quick fox" ------- - -The user expects this search to match the sentence indexed earlier, -`The QUICK brown foxes jumped over the dog!`. - -However, the query string does not contain the exact words used in the -document's original text: - -* `Quick` vs `QUICK` -* `fox` vs `foxes` - -To account for this, the query string is analyzed using the same analyzer. This -analyzer produces the following tokens: - -[source,text] ------- -[ quick, fox ] ------- - -To execute the search, {es} compares these query string tokens to the tokens -indexed in the `text` field. - -[options="header"] -|=== -|Token | Query string | `text` field -|`quick` | X | X -|`brown` | | X -|`fox` | X | X -|`jump` | | X -|`over` | | X -|`dog` | | X -|=== - -Because the field value and query string were analyzed in the same way, they -created similar tokens. The tokens `quick` and `fox` are exact matches. This -means the search matches the document containing -`"The QUICK brown foxes jumped over the dog!"`, just as the user expects. -==== - -[[different-analyzers]] -==== When to use a different search analyzer - -While less common, it sometimes makes sense to use different analyzers at index -and search time. To enable this, {es} allows you to -<>. - -Generally, a separate search analyzer should only be specified when using the -same form of tokens for field values and query strings would create unexpected -or irrelevant search matches. - -[[different-analyzer-ex]] -.*Example* -[%collapsible] -==== -{es} is used to create a search engine that matches only words that start with -a provided prefix. For instance, a search for `tr` should return `tram` or -`trope`—but never `taxi` or `bat`. - -A document is added to the search engine's index; this document contains one -such word in a `text` field: - -[source,text] ------- -"Apple" ------- - -The index analyzer for the field converts the value into tokens and normalizes -them. In this case, each of the tokens represents a potential prefix for -the word: - -[source,text] ------- -[ a, ap, app, appl, apple] ------- - -These tokens are then indexed. - -Later, a user searches the same `text` field for: - -[source,text] ------- -"appli" ------- - -The user expects this search to match only words that start with `appli`, -such as `appliance` or `application`. The search should not match `apple`. - -However, if the index analyzer is used to analyze this query string, it would -produce the following tokens: - -[source,text] ------- -[ a, ap, app, appl, appli ] ------- - -When {es} compares these query string tokens to the ones indexed for `apple`, -it finds several matches. - -[options="header"] -|=== -|Token | `appli` | `apple` -|`a` | X | X -|`ap` | X | X -|`app` | X | X -|`appl` | X | X -|`appli` | | X -|=== - -This means the search would erroneously match `apple`. Not only that, it would -match any word starting with `a`. - -To fix this, you can specify a different search analyzer for query strings used -on the `text` field. - -In this case, you could specify a search analyzer that produces a single token -rather than a set of prefixes: - -[source,text] ------- -[ appli ] ------- - -This query string token would only match tokens for words that start with -`appli`, which better aligns with the user's search expectations. -==== diff --git a/docs/reference/analysis/normalizers.asciidoc b/docs/reference/analysis/normalizers.asciidoc deleted file mode 100644 index 6acd415437525..0000000000000 --- a/docs/reference/analysis/normalizers.asciidoc +++ /dev/null @@ -1,59 +0,0 @@ -[[analysis-normalizers]] -== Normalizers - -Normalizers are similar to analyzers except that they may only emit a single -token. As a consequence, they do not have a tokenizer and only accept a subset -of the available char filters and token filters. Only the filters that work on -a per-character basis are allowed. For instance a lowercasing filter would be -allowed, but not a stemming filter, which needs to look at the keyword as a -whole. The current list of filters that can be used in a normalizer definition -are: `arabic_normalization`, `asciifolding`, `bengali_normalization`, -`cjk_width`, `decimal_digit`, `elision`, `german_normalization`, -`hindi_normalization`, `indic_normalization`, `lowercase`, `pattern_replace`, -`persian_normalization`, `scandinavian_folding`, `serbian_normalization`, -`sorani_normalization`, `trim`, `uppercase`. - -Elasticsearch ships with a `lowercase` built-in normalizer. For other forms of -normalization, a custom configuration is required. - -[discrete] -=== Custom normalizers - -Custom normalizers take a list of -<> and a list of -<>. - -[source,console] --------------------------------- -PUT index -{ - "settings": { - "analysis": { - "char_filter": { - "quote": { - "type": "mapping", - "mappings": [ - "« => \"", - "» => \"" - ] - } - }, - "normalizer": { - "my_normalizer": { - "type": "custom", - "char_filter": ["quote"], - "filter": ["lowercase", "asciifolding"] - } - } - } - }, - "mappings": { - "properties": { - "foo": { - "type": "keyword", - "normalizer": "my_normalizer" - } - } - } -} --------------------------------- diff --git a/docs/reference/analysis/overview.asciidoc b/docs/reference/analysis/overview.asciidoc deleted file mode 100644 index 8cf5a10ae8af5..0000000000000 --- a/docs/reference/analysis/overview.asciidoc +++ /dev/null @@ -1,78 +0,0 @@ -[[analysis-overview]] -== Text analysis overview -++++ -Overview -++++ - -Text analysis enables {es} to perform full-text search, where the search returns -all _relevant_ results rather than just exact matches. - -If you search for `Quick fox jumps`, you probably want the document that -contains `A quick brown fox jumps over the lazy dog`, and you might also want -documents that contain related words like `fast fox` or `foxes leap`. - -[discrete] -[[tokenization]] -=== Tokenization - -Analysis makes full-text search possible through _tokenization_: breaking a text -down into smaller chunks, called _tokens_. In most cases, these tokens are -individual words. - -If you index the phrase `the quick brown fox jumps` as a single string and the -user searches for `quick fox`, it isn't considered a match. However, if you -tokenize the phrase and index each word separately, the terms in the query -string can be looked up individually. This means they can be matched by searches -for `quick fox`, `fox brown`, or other variations. - -[discrete] -[[normalization]] -=== Normalization - -Tokenization enables matching on individual terms, but each token is still -matched literally. This means: - -* A search for `Quick` would not match `quick`, even though you likely want -either term to match the other - -* Although `fox` and `foxes` share the same root word, a search for `foxes` -would not match `fox` or vice versa. - -* A search for `jumps` would not match `leaps`. While they don't share a root -word, they are synonyms and have a similar meaning. - -To solve these problems, text analysis can _normalize_ these tokens into a -standard format. This allows you to match tokens that are not exactly the same -as the search terms, but similar enough to still be relevant. For example: - -* `Quick` can be lowercased: `quick`. - -* `foxes` can be _stemmed_, or reduced to its root word: `fox`. - -* `jump` and `leap` are synonyms and can be indexed as a single word: `jump`. - -To ensure search terms match these words as intended, you can apply the same -tokenization and normalization rules to the query string. For example, a search -for `Foxes leap` can be normalized to a search for `fox jump`. - -[discrete] -[[analysis-customization]] -=== Customize text analysis - -Text analysis is performed by an <>, a set of rules -that govern the entire process. - -{es} includes a default analyzer, called the -<>, which works well for most use -cases right out of the box. - -If you want to tailor your search experience, you can choose a different -<> or even -<>. A custom analyzer gives you -control over each step of the analysis process, including: - -* Changes to the text _before_ tokenization - -* How text is converted to tokens - -* Normalization changes made to tokens before indexing or search \ No newline at end of file diff --git a/docs/reference/analysis/specify-analyzer.asciidoc b/docs/reference/analysis/specify-analyzer.asciidoc deleted file mode 100644 index d3114a74984e6..0000000000000 --- a/docs/reference/analysis/specify-analyzer.asciidoc +++ /dev/null @@ -1,202 +0,0 @@ -[[specify-analyzer]] -=== Specify an analyzer - -{es} offers a variety of ways to specify built-in or custom analyzers: - -* By `text` field, index, or query -* For <> - -[TIP] -.Keep it simple -==== -The flexibility to specify analyzers at different levels and for different times -is great... _but only when it's needed_. - -In most cases, a simple approach works best: Specify an analyzer for each -`text` field, as outlined in <>. - -This approach works well with {es}'s default behavior, letting you use the same -analyzer for indexing and search. It also lets you quickly see which analyzer -applies to which field using the <>. - -If you don't typically create mappings for your indices, you can use -<> to achieve a similar effect. -==== - -[[specify-index-time-analyzer]] -==== How {es} determines the index analyzer - -{es} determines which index analyzer to use by checking the following parameters -in order: - -. The <> mapping parameter for the field. - See <>. -. The `analysis.analyzer.default` index setting. - See <>. - -If none of these parameters are specified, the -<> is used. - -[[specify-index-field-analyzer]] -==== Specify the analyzer for a field - -When mapping an index, you can use the <> mapping parameter -to specify an analyzer for each `text` field. - -The following <> request sets the -`whitespace` analyzer as the analyzer for the `title` field. - -[source,console] ----- -PUT my-index-000001 -{ - "mappings": { - "properties": { - "title": { - "type": "text", - "analyzer": "whitespace" - } - } - } -} ----- - -[[specify-index-time-default-analyzer]] -==== Specify the default analyzer for an index - -In addition to a field-level analyzer, you can set a fallback analyzer for -using the `analysis.analyzer.default` setting. - -The following <> request sets the -`simple` analyzer as the fallback analyzer for `my-index-000001`. - -[source,console] ----- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "default": { - "type": "simple" - } - } - } - } -} ----- - -[[specify-search-analyzer]] -==== How {es} determines the search analyzer - -// tag::search-analyzer-warning[] -[WARNING] -==== -In most cases, specifying a different search analyzer is unnecessary. Doing so -could negatively impact relevancy and result in unexpected search results. - -If you choose to specify a separate search analyzer, we recommend you thoroughly -<> before deploying in -production. -==== -// end::search-analyzer-warning[] - -At search time, {es} determines which analyzer to use by checking the following -parameters in order: - -. The <> parameter in the search query. - See <>. -. The <> mapping parameter for the field. - See <>. -. The `analysis.analyzer.default_search` index setting. - See <>. -. The <> mapping parameter for the field. - See <>. - -If none of these parameters are specified, the -<> is used. - -[[specify-search-query-analyzer]] -==== Specify the search analyzer for a query - -When writing a <>, you can use the `analyzer` -parameter to specify a search analyzer. If provided, this overrides any other -search analyzers. - -The following <> request sets the `stop` analyzer as -the search analyzer for a <> query. - -[source,console] ----- -GET my-index-000001/_search -{ - "query": { - "match": { - "message": { - "query": "Quick foxes", - "analyzer": "stop" - } - } - } -} ----- -// TEST[s/^/PUT my-index-000001\n/] - -[[specify-search-field-analyzer]] -==== Specify the search analyzer for a field - -When mapping an index, you can use the <> mapping -parameter to specify a search analyzer for each `text` field. - -If a search analyzer is provided, the index analyzer must also be specified -using the `analyzer` parameter. - -The following <> request sets the -`simple` analyzer as the search analyzer for the `title` field. - -[source,console] ----- -PUT my-index-000001 -{ - "mappings": { - "properties": { - "title": { - "type": "text", - "analyzer": "whitespace", - "search_analyzer": "simple" - } - } - } -} ----- - -[[specify-search-default-analyzer]] -==== Specify the default search analyzer for an index - -When <>, you can set a default search -analyzer using the `analysis.analyzer.default_search` setting. - -If a search analyzer is provided, a default index analyzer must also be -specified using the `analysis.analyzer.default` setting. - -The following <> request sets the -`whitespace` analyzer as the default search analyzer for the `my-index-000001` index. - -[source,console] ----- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "default": { - "type": "simple" - }, - "default_search": { - "type": "whitespace" - } - } - } - } -} ----- diff --git a/docs/reference/analysis/stemming.asciidoc b/docs/reference/analysis/stemming.asciidoc deleted file mode 100644 index 6e92348f98752..0000000000000 --- a/docs/reference/analysis/stemming.asciidoc +++ /dev/null @@ -1,125 +0,0 @@ -[[stemming]] -=== Stemming - -_Stemming_ is the process of reducing a word to its root form. This ensures -variants of a word match during a search. - -For example, `walking` and `walked` can be stemmed to the same root word: -`walk`. Once stemmed, an occurrence of either word would match the other in a -search. - -Stemming is language-dependent but often involves removing prefixes and -suffixes from words. - -In some cases, the root form of a stemmed word may not be a real word. For -example, `jumping` and `jumpiness` can both be stemmed to `jumpi`. While `jumpi` -isn't a real English word, it doesn't matter for search; if all variants of a -word are reduced to the same root form, they will match correctly. - -[[stemmer-token-filters]] -==== Stemmer token filters - -In {es}, stemming is handled by stemmer <>. These token filters can be categorized based on how they stem words: - -* <>, which stem words based on a set -of rules -* <>, which stem words by looking them -up in a dictionary - -Because stemming changes tokens, we recommend using the same stemmer token -filters during <>. - -[[algorithmic-stemmers]] -==== Algorithmic stemmers - -Algorithmic stemmers apply a series of rules to each word to reduce it to its -root form. For example, an algorithmic stemmer for English may remove the `-s` -and `-es` suffixes from the end of plural words. - -Algorithmic stemmers have a few advantages: - -* They require little setup and usually work well out of the box. -* They use little memory. -* They are typically faster than <>. - -However, most algorithmic stemmers only alter the existing text of a word. This -means they may not work well with irregular words that don't contain their root -form, such as: - -* `be`, `are`, and `am` -* `mouse` and `mice` -* `foot` and `feet` - -The following token filters use algorithmic stemming: - -* <>, which provides algorithmic -stemming for several languages, some with additional variants. -* <>, a stemmer for English that combines -algorithmic stemming with a built-in dictionary. -* <>, our recommended algorithmic -stemmer for English. -* <>, which uses -https://snowballstem.org/[Snowball]-based stemming rules for several -languages. - -[[dictionary-stemmers]] -==== Dictionary stemmers - -Dictionary stemmers look up words in a provided dictionary, replacing unstemmed -word variants with stemmed words from the dictionary. - -In theory, dictionary stemmers are well suited for: - -* Stemming irregular words -* Discerning between words that are spelled similarly but not related -conceptually, such as: -** `organ` and `organization` -** `broker` and `broken` - -In practice, algorithmic stemmers typically outperform dictionary stemmers. This -is because dictionary stemmers have the following disadvantages: - -* *Dictionary quality* + -A dictionary stemmer is only as good as its dictionary. To work well, these -dictionaries must include a significant number of words, be updated regularly, -and change with language trends. Often, by the time a dictionary has been made -available, it's incomplete and some of its entries are already outdated. - -* *Size and performance* + -Dictionary stemmers must load all words, prefixes, and suffixes from its -dictionary into memory. This can use a significant amount of RAM. Low-quality -dictionaries may also be less efficient with prefix and suffix removal, which -can slow the stemming process significantly. - -You can use the <> token filter to -perform dictionary stemming. - -[TIP] -==== -If available, we recommend trying an algorithmic stemmer for your language -before using the <> token filter. -==== - -[[control-stemming]] -==== Control stemming - -Sometimes stemming can produce shared root words that are spelled similarly but -not related conceptually. For example, a stemmer may reduce both `skies` and -`skiing` to the same root word: `ski`. - -To prevent this and better control stemming, you can use the following token -filters: - -* <>, which lets you -define rules for stemming specific tokens. -* <>, which marks -specified tokens as keywords. Keyword tokens are not stemmed by subsequent -stemmer token filters. -* <>, which can be used to mark -tokens as keywords, similar to the `keyword_marker` filter. - - -For built-in <>, you also can use the -<<_excluding_words_from_stemming,`stem_exclusion`>> parameter to specify a list -of words that won't be stemmed. diff --git a/docs/reference/analysis/testing.asciidoc b/docs/reference/analysis/testing.asciidoc deleted file mode 100644 index a430fb18a05e6..0000000000000 --- a/docs/reference/analysis/testing.asciidoc +++ /dev/null @@ -1,207 +0,0 @@ -[[test-analyzer]] -=== Test an analyzer - -The <> is an invaluable tool for viewing the -terms produced by an analyzer. A built-in analyzer can be specified inline in -the request: - -[source,console] -------------------------------------- -POST _analyze -{ - "analyzer": "whitespace", - "text": "The quick brown fox." -} -------------------------------------- - -The API returns the following response: - -[source,console-result] -------------------------------------- -{ - "tokens": [ - { - "token": "The", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "quick", - "start_offset": 4, - "end_offset": 9, - "type": "word", - "position": 1 - }, - { - "token": "brown", - "start_offset": 10, - "end_offset": 15, - "type": "word", - "position": 2 - }, - { - "token": "fox.", - "start_offset": 16, - "end_offset": 20, - "type": "word", - "position": 3 - } - ] -} -------------------------------------- - -You can also test combinations of: - -* A tokenizer -* Zero or more token filters -* Zero or more character filters - -[source,console] -------------------------------------- -POST _analyze -{ - "tokenizer": "standard", - "filter": [ "lowercase", "asciifolding" ], - "text": "Is this déja vu?" -} -------------------------------------- - -The API returns the following response: - -[source,console-result] -------------------------------------- -{ - "tokens": [ - { - "token": "is", - "start_offset": 0, - "end_offset": 2, - "type": "", - "position": 0 - }, - { - "token": "this", - "start_offset": 3, - "end_offset": 7, - "type": "", - "position": 1 - }, - { - "token": "deja", - "start_offset": 8, - "end_offset": 12, - "type": "", - "position": 2 - }, - { - "token": "vu", - "start_offset": 13, - "end_offset": 15, - "type": "", - "position": 3 - } - ] -} -------------------------------------- - -.Positions and character offsets -********************************************************* - -As can be seen from the output of the `analyze` API, analyzers not only -convert words into terms, they also record the order or relative _positions_ -of each term (used for phrase queries or word proximity queries), and the -start and end _character offsets_ of each term in the original text (used for -highlighting search snippets). - -********************************************************* - - -Alternatively, a <> can be -referred to when running the `analyze` API on a specific index: - -[source,console] -------------------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "std_folded": { <1> - "type": "custom", - "tokenizer": "standard", - "filter": [ - "lowercase", - "asciifolding" - ] - } - } - } - }, - "mappings": { - "properties": { - "my_text": { - "type": "text", - "analyzer": "std_folded" <2> - } - } - } -} - -GET my-index-000001/_analyze <3> -{ - "analyzer": "std_folded", <4> - "text": "Is this déjà vu?" -} - -GET my-index-000001/_analyze <3> -{ - "field": "my_text", <5> - "text": "Is this déjà vu?" -} -------------------------------------- - -The API returns the following response: - -[source,console-result] -------------------------------------- -{ - "tokens": [ - { - "token": "is", - "start_offset": 0, - "end_offset": 2, - "type": "", - "position": 0 - }, - { - "token": "this", - "start_offset": 3, - "end_offset": 7, - "type": "", - "position": 1 - }, - { - "token": "deja", - "start_offset": 8, - "end_offset": 12, - "type": "", - "position": 2 - }, - { - "token": "vu", - "start_offset": 13, - "end_offset": 15, - "type": "", - "position": 3 - } - ] -} -------------------------------------- - -<1> Define a `custom` analyzer called `std_folded`. -<2> The field `my_text` uses the `std_folded` analyzer. -<3> To refer to this analyzer, the `analyze` API must specify the index name. -<4> Refer to the analyzer by name. -<5> Refer to the analyzer used by field `my_text`. diff --git a/docs/reference/analysis/token-graphs.asciidoc b/docs/reference/analysis/token-graphs.asciidoc deleted file mode 100644 index 55d69695bd62b..0000000000000 --- a/docs/reference/analysis/token-graphs.asciidoc +++ /dev/null @@ -1,108 +0,0 @@ -[[token-graphs]] -=== Token graphs - -When a <> converts a text into a stream of -tokens, it also records the following: - -* The `position` of each token in the stream -* The `positionLength`, the number of positions that a token spans - -Using these, you can create a -{wikipedia}/Directed_acyclic_graph[directed acyclic graph], -called a _token graph_, for a stream. In a token graph, each position represents -a node. Each token represents an edge or arc, pointing to the next position. - -image::images/analysis/token-graph-qbf-ex.svg[align="center"] - -[[token-graphs-synonyms]] -==== Synonyms - -Some <> can add new tokens, like -synonyms, to an existing token stream. These synonyms often span the same -positions as existing tokens. - -In the following graph, `quick` and its synonym `fast` both have a position of -`0`. They span the same positions. - -image::images/analysis/token-graph-qbf-synonym-ex.svg[align="center"] - -[[token-graphs-multi-position-tokens]] -==== Multi-position tokens - -Some token filters can add tokens that span multiple positions. These can -include tokens for multi-word synonyms, such as using "atm" as a synonym for -"automatic teller machine." - -However, only some token filters, known as _graph token filters_, accurately -record the `positionLength` for multi-position tokens. These filters include: - -* <> -* <> - -Some tokenizers, such as the -{plugins}/analysis-nori-tokenizer.html[`nori_tokenizer`], also accurately -decompose compound tokens into multi-position tokens. - -In the following graph, `domain name system` and its synonym, `dns`, both have a -position of `0`. However, `dns` has a `positionLength` of `3`. Other tokens in -the graph have a default `positionLength` of `1`. - -image::images/analysis/token-graph-dns-synonym-ex.svg[align="center"] - -[[token-graphs-token-graphs-search]] -===== Using token graphs for search - -<> ignores the `positionLength` attribute -and does not support token graphs containing multi-position tokens. - -However, queries, such as the <> or -<> query, can use these graphs to -generate multiple sub-queries from a single query string. - -.*Example* -[%collapsible] -==== - -A user runs a search for the following phrase using the `match_phrase` query: - -`domain name system is fragile` - -During <>, `dns`, a synonym for -`domain name system`, is added to the query string's token stream. The `dns` -token has a `positionLength` of `3`. - -image::images/analysis/token-graph-dns-synonym-ex.svg[align="center"] - -The `match_phrase` query uses this graph to generate sub-queries for the -following phrases: - -[source,text] ------- -dns is fragile -domain name system is fragile ------- - -This means the query matches documents containing either `dns is fragile` _or_ -`domain name system is fragile`. -==== - -[[token-graphs-invalid-token-graphs]] -===== Invalid token graphs - -The following token filters can add tokens that span multiple positions but -only record a default `positionLength` of `1`: - -* <> -* <> - -This means these filters will produce invalid token graphs for streams -containing such tokens. - -In the following graph, `dns` is a multi-position synonym for `domain name -system`. However, `dns` has the default `positionLength` value of `1`, resulting -in an invalid graph. - -image::images/analysis/token-graph-dns-invalid-ex.svg[align="center"] - -Avoid using invalid token graphs for search. Invalid graphs can cause unexpected -search results. diff --git a/docs/reference/analysis/tokenfilters.asciidoc b/docs/reference/analysis/tokenfilters.asciidoc deleted file mode 100644 index 46cd0347b72fd..0000000000000 --- a/docs/reference/analysis/tokenfilters.asciidoc +++ /dev/null @@ -1,107 +0,0 @@ -[[analysis-tokenfilters]] -== Token filter reference - -Token filters accept a stream of tokens from a -<> and can modify tokens -(eg lowercasing), delete tokens (eg remove stopwords) -or add tokens (eg synonyms). - -{es} has a number of built-in token filters you can use -to build <>. - - -include::tokenfilters/apostrophe-tokenfilter.asciidoc[] - -include::tokenfilters/asciifolding-tokenfilter.asciidoc[] - -include::tokenfilters/cjk-bigram-tokenfilter.asciidoc[] - -include::tokenfilters/cjk-width-tokenfilter.asciidoc[] - -include::tokenfilters/classic-tokenfilter.asciidoc[] - -include::tokenfilters/common-grams-tokenfilter.asciidoc[] - -include::tokenfilters/condition-tokenfilter.asciidoc[] - -include::tokenfilters/decimal-digit-tokenfilter.asciidoc[] - -include::tokenfilters/delimited-payload-tokenfilter.asciidoc[] - -include::tokenfilters/dictionary-decompounder-tokenfilter.asciidoc[] - -include::tokenfilters/edgengram-tokenfilter.asciidoc[] - -include::tokenfilters/elision-tokenfilter.asciidoc[] - -include::tokenfilters/fingerprint-tokenfilter.asciidoc[] - -include::tokenfilters/flatten-graph-tokenfilter.asciidoc[] - -include::tokenfilters/hunspell-tokenfilter.asciidoc[] - -include::tokenfilters/hyphenation-decompounder-tokenfilter.asciidoc[] - -include::tokenfilters/keep-types-tokenfilter.asciidoc[] - -include::tokenfilters/keep-words-tokenfilter.asciidoc[] - -include::tokenfilters/keyword-marker-tokenfilter.asciidoc[] - -include::tokenfilters/keyword-repeat-tokenfilter.asciidoc[] - -include::tokenfilters/kstem-tokenfilter.asciidoc[] - -include::tokenfilters/length-tokenfilter.asciidoc[] - -include::tokenfilters/limit-token-count-tokenfilter.asciidoc[] - -include::tokenfilters/lowercase-tokenfilter.asciidoc[] - -include::tokenfilters/minhash-tokenfilter.asciidoc[] - -include::tokenfilters/multiplexer-tokenfilter.asciidoc[] - -include::tokenfilters/ngram-tokenfilter.asciidoc[] - -include::tokenfilters/normalization-tokenfilter.asciidoc[] - -include::tokenfilters/pattern-capture-tokenfilter.asciidoc[] - -include::tokenfilters/pattern_replace-tokenfilter.asciidoc[] - -include::tokenfilters/phonetic-tokenfilter.asciidoc[] - -include::tokenfilters/porterstem-tokenfilter.asciidoc[] - -include::tokenfilters/predicate-tokenfilter.asciidoc[] - -include::tokenfilters/remove-duplicates-tokenfilter.asciidoc[] - -include::tokenfilters/reverse-tokenfilter.asciidoc[] - -include::tokenfilters/shingle-tokenfilter.asciidoc[] - -include::tokenfilters/snowball-tokenfilter.asciidoc[] - -include::tokenfilters/stemmer-tokenfilter.asciidoc[] - -include::tokenfilters/stemmer-override-tokenfilter.asciidoc[] - -include::tokenfilters/stop-tokenfilter.asciidoc[] - -include::tokenfilters/synonym-tokenfilter.asciidoc[] - -include::tokenfilters/synonym-graph-tokenfilter.asciidoc[] - -include::tokenfilters/trim-tokenfilter.asciidoc[] - -include::tokenfilters/truncate-tokenfilter.asciidoc[] - -include::tokenfilters/unique-tokenfilter.asciidoc[] - -include::tokenfilters/uppercase-tokenfilter.asciidoc[] - -include::tokenfilters/word-delimiter-tokenfilter.asciidoc[] - -include::tokenfilters/word-delimiter-graph-tokenfilter.asciidoc[] diff --git a/docs/reference/analysis/tokenfilters/_token-filter-template.asciidoc b/docs/reference/analysis/tokenfilters/_token-filter-template.asciidoc deleted file mode 100644 index c2558d0b91bd8..0000000000000 --- a/docs/reference/analysis/tokenfilters/_token-filter-template.asciidoc +++ /dev/null @@ -1,233 +0,0 @@ -//// -This is a template for token filter reference documentation. - -To document a new token filter, copy this file, remove comments like this, and -replace "sample" with the appropriate filter name. - -Ensure the new filter docs are linked and included in -docs/reference/analysis/tokefilters.asciidoc -//// - -[[sample-tokenfilter]] -=== Sample token filter -++++ -Sample -++++ - -//// -INTRO -Include a brief, 1-2 sentence description. -If based on a Lucene token filter, link to the Lucene documentation. -//// - -Does a cool thing. For example, the `sample` filter changes `x` to `y`. - -The filter uses Lucene's -{lucene-analysis-docs}/sampleFilter.html[SampleFilter]. - -[[analysis-sample-tokenfilter-analyze-ex]] -==== Example -//// -Basic example of the filter's input and output token streams. - -Guidelines -*************************************** -* The _analyze API response should be included but commented out. -* Ensure // TEST[skip:...] comments are removed. -*************************************** -//// - -The following <> request uses the `sample` -filter to do a cool thing to `the quick fox jumps the lazy dog`: - -[source,console] ----- -GET /_analyze -{ - "tokenizer" : "standard", - "filter" : ["sample"], - "text" : "the quick fox jumps the lazy dog" -} ----- -// TEST[skip: REMOVE THIS COMMENT.] - -The filter produces the following tokens: - -[source,text] ----- -[ the, quick, fox, jumps, the, lazy, dog ] ----- - -//// -[source,console-result] ----- -{ - "tokens" : [ - { - "token" : "the", - "start_offset" : 0, - "end_offset" : 3, - "type" : "", - "position" : 0 - }, - { - "token" : "quick", - "start_offset" : 4, - "end_offset" : 9, - "type" : "", - "position" : 1 - }, - { - "token" : "fox", - "start_offset" : 10, - "end_offset" : 13, - "type" : "", - "position" : 2 - }, - { - "token" : "jumps", - "start_offset" : 14, - "end_offset" : 19, - "type" : "", - "position" : 3 - }, - { - "token" : "over", - "start_offset" : 20, - "end_offset" : 24, - "type" : "", - "position" : 4 - }, - { - "token" : "the", - "start_offset" : 25, - "end_offset" : 28, - "type" : "", - "position" : 5 - }, - { - "token" : "lazy", - "start_offset" : 29, - "end_offset" : 33, - "type" : "", - "position" : 6 - }, - { - "token" : "dog", - "start_offset" : 34, - "end_offset" : 37, - "type" : "", - "position" : 7 - } - ] -} ----- -// TEST[skip: REMOVE THIS COMMENT.] -//// - -[[analysis-sample-tokenfilter-analyzer-ex]] -==== Add to an analyzer -//// -Example of how to add a pre-configured token filter to an analyzer. -If the filter requires arguments, skip this section. - -Guidelines -*************************************** -* If needed, change the tokenizer so the example fits the filter. -* Ensure // TEST[skip:...] comments are removed. -*************************************** -//// - -The following <> request uses the -`sample` filter to configure a new <>. - -[source,console] ----- -PUT sample_example -{ - "settings": { - "analysis": { - "analyzer": { - "my_sample_analyzer": { - "tokenizer": "standard", - "filter": [ "sample" ] - } - } - } - } -} ----- -// TEST[skip: REMOVE THIS COMMENT.] - - -[[analysis-sample-tokenfilter-configure-parms]] -==== Configurable parameters -//// -Documents each parameter for the token filter. -If the filter does not have any configurable parameters, skip this section. - -Guidelines -*************************************** -* Use a definition list. -* End each definition with a period. -* Include whether the parameter is Optional or Required and the data type. -* Include default values as the last sentence of the first paragraph. -* Include a range of valid values, if applicable. -* If the parameter requires a specific delimiter for multiple values, say so. -* If the parameter supports wildcards, ditto. -* For large or nested objects, consider linking to a separate definition list. -*************************************** -//// - -`foo`:: -(Optional, Boolean) -If `true`, do a cool thing. -Defaults to `false`. - -`baz`:: -(Optional, string) -Path to another cool thing. - -[[analysis-sample-tokenfilter-customize]] -==== Customize -//// -Example of a custom token filter with configurable parameters. -If the filter does not have any configurable parameters, skip this section. - -Guidelines -*************************************** -* If able, use a different tokenizer than used in "Add to an analyzer." -* Ensure // TEST[skip:...] comments are removed. -*************************************** -//// - -To customize the `sample` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom `sample` filter with -`foo` set to `true`: - -[source,console] ----- -PUT sample_example -{ - "settings": { - "analysis": { - "analyzer": { - "my_custom_analyzer": { - "tokenizer": "whitespace", - "filter": [ "my_custom_sample_token_filter" ] - } - }, - "filter": { - "my_custom_sample_token_filter": { - "type": "sample", - "foo": true - } - } - } - } -} ----- -// TEST[skip: REMOVE THIS COMMENT.] diff --git a/docs/reference/analysis/tokenfilters/apostrophe-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/apostrophe-tokenfilter.asciidoc deleted file mode 100644 index 49c75e47af0aa..0000000000000 --- a/docs/reference/analysis/tokenfilters/apostrophe-tokenfilter.asciidoc +++ /dev/null @@ -1,91 +0,0 @@ -[[analysis-apostrophe-tokenfilter]] -=== Apostrophe token filter -++++ -Apostrophe -++++ - -Strips all characters after an apostrophe, including the apostrophe itself. - -This filter is included in {es}'s built-in <>. It uses Lucene's -{lucene-analysis-docs}/tr/ApostropheFilter.html[ApostropheFilter], which was -built for the Turkish language. - - -[[analysis-apostrophe-tokenfilter-analyze-ex]] -==== Example - -The following <> request demonstrates how the -apostrophe token filter works. - -[source,console] --------------------------------------------------- -GET /_analyze -{ - "tokenizer" : "standard", - "filter" : ["apostrophe"], - "text" : "Istanbul'a veya Istanbul'dan" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ Istanbul, veya, Istanbul ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "Istanbul", - "start_offset" : 0, - "end_offset" : 10, - "type" : "", - "position" : 0 - }, - { - "token" : "veya", - "start_offset" : 11, - "end_offset" : 15, - "type" : "", - "position" : 1 - }, - { - "token" : "Istanbul", - "start_offset" : 16, - "end_offset" : 28, - "type" : "", - "position" : 2 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-apostrophe-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -apostrophe token filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT /apostrophe_example -{ - "settings": { - "analysis": { - "analyzer": { - "standard_apostrophe": { - "tokenizer": "standard", - "filter": [ "apostrophe" ] - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc deleted file mode 100644 index 6b836f26dadfd..0000000000000 --- a/docs/reference/analysis/tokenfilters/asciifolding-tokenfilter.asciidoc +++ /dev/null @@ -1,138 +0,0 @@ -[[analysis-asciifolding-tokenfilter]] -=== ASCII folding token filter -++++ -ASCII folding -++++ - -Converts alphabetic, numeric, and symbolic characters that are not in the Basic -Latin Unicode block (first 127 ASCII characters) to their ASCII equivalent, if -one exists. For example, the filter changes `à` to `a`. - -This filter uses Lucene's -{lucene-analysis-docs}/miscellaneous/ASCIIFoldingFilter.html[ASCIIFoldingFilter]. - -[[analysis-asciifolding-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `asciifolding` -filter to drop the diacritical marks in `açaí à la carte`: - -[source,console] --------------------------------------------------- -GET /_analyze -{ - "tokenizer" : "standard", - "filter" : ["asciifolding"], - "text" : "açaí à la carte" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ acai, a, la, carte ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "acai", - "start_offset" : 0, - "end_offset" : 4, - "type" : "", - "position" : 0 - }, - { - "token" : "a", - "start_offset" : 5, - "end_offset" : 6, - "type" : "", - "position" : 1 - }, - { - "token" : "la", - "start_offset" : 7, - "end_offset" : 9, - "type" : "", - "position" : 2 - }, - { - "token" : "carte", - "start_offset" : 10, - "end_offset" : 15, - "type" : "", - "position" : 3 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-asciifolding-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`asciifolding` filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT /asciifold_example -{ - "settings": { - "analysis": { - "analyzer": { - "standard_asciifolding": { - "tokenizer": "standard", - "filter": [ "asciifolding" ] - } - } - } - } -} --------------------------------------------------- - -[[analysis-asciifolding-tokenfilter-configure-parms]] -==== Configurable parameters - -`preserve_original`:: -(Optional, Boolean) -If `true`, emit both original tokens and folded tokens. -Defaults to `false`. - -[[analysis-asciifolding-tokenfilter-customize]] -==== Customize - -To customize the `asciifolding` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom `asciifolding` filter with -`preserve_original` set to true: - -[source,console] --------------------------------------------------- -PUT /asciifold_example -{ - "settings": { - "analysis": { - "analyzer": { - "standard_asciifolding": { - "tokenizer": "standard", - "filter": [ "my_ascii_folding" ] - } - }, - "filter": { - "my_ascii_folding": { - "type": "asciifolding", - "preserve_original": true - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/cjk-bigram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/cjk-bigram-tokenfilter.asciidoc deleted file mode 100644 index ca7ced2239811..0000000000000 --- a/docs/reference/analysis/tokenfilters/cjk-bigram-tokenfilter.asciidoc +++ /dev/null @@ -1,201 +0,0 @@ -[[analysis-cjk-bigram-tokenfilter]] -=== CJK bigram token filter -++++ -CJK bigram -++++ - -Forms {wikipedia}/Bigram[bigrams] out of CJK (Chinese, -Japanese, and Korean) tokens. - -This filter is included in {es}'s built-in <>. It uses Lucene's -{lucene-analysis-docs}/cjk/CJKBigramFilter.html[CJKBigramFilter]. - - -[[analysis-cjk-bigram-tokenfilter-analyze-ex]] -==== Example - -The following <> request demonstrates how the -CJK bigram token filter works. - -[source,console] --------------------------------------------------- -GET /_analyze -{ - "tokenizer" : "standard", - "filter" : ["cjk_bigram"], - "text" : "東京都は、日本の首都であり" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ 東京, 京都, 都は, 日本, 本の, の首, 首都, 都で, であ, あり ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "東京", - "start_offset" : 0, - "end_offset" : 2, - "type" : "", - "position" : 0 - }, - { - "token" : "京都", - "start_offset" : 1, - "end_offset" : 3, - "type" : "", - "position" : 1 - }, - { - "token" : "都は", - "start_offset" : 2, - "end_offset" : 4, - "type" : "", - "position" : 2 - }, - { - "token" : "日本", - "start_offset" : 5, - "end_offset" : 7, - "type" : "", - "position" : 3 - }, - { - "token" : "本の", - "start_offset" : 6, - "end_offset" : 8, - "type" : "", - "position" : 4 - }, - { - "token" : "の首", - "start_offset" : 7, - "end_offset" : 9, - "type" : "", - "position" : 5 - }, - { - "token" : "首都", - "start_offset" : 8, - "end_offset" : 10, - "type" : "", - "position" : 6 - }, - { - "token" : "都で", - "start_offset" : 9, - "end_offset" : 11, - "type" : "", - "position" : 7 - }, - { - "token" : "であ", - "start_offset" : 10, - "end_offset" : 12, - "type" : "", - "position" : 8 - }, - { - "token" : "あり", - "start_offset" : 11, - "end_offset" : 13, - "type" : "", - "position" : 9 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-cjk-bigram-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -CJK bigram token filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT /cjk_bigram_example -{ - "settings": { - "analysis": { - "analyzer": { - "standard_cjk_bigram": { - "tokenizer": "standard", - "filter": [ "cjk_bigram" ] - } - } - } - } -} --------------------------------------------------- - - -[[analysis-cjk-bigram-tokenfilter-configure-parms]] -==== Configurable parameters - -`ignored_scripts`:: -+ --- -(Optional, array of character scripts) -Array of character scripts for which to disable bigrams. -Possible values: - -* `han` -* `hangul` -* `hiragana` -* `katakana` - -All non-CJK input is passed through unmodified. --- - -`output_unigrams`:: -(Optional, Boolean) -If `true`, emit tokens in both bigram and -{wikipedia}/N-gram[unigram] form. If `false`, a CJK character -is output in unigram form when it has no adjacent characters. Defaults to -`false`. - -[[analysis-cjk-bigram-tokenfilter-customize]] -==== Customize - -To customize the CJK bigram token filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -[source,console] --------------------------------------------------- -PUT /cjk_bigram_example -{ - "settings": { - "analysis": { - "analyzer": { - "han_bigrams": { - "tokenizer": "standard", - "filter": [ "han_bigrams_filter" ] - } - }, - "filter": { - "han_bigrams_filter": { - "type": "cjk_bigram", - "ignored_scripts": [ - "hangul", - "hiragana", - "katakana" - ], - "output_unigrams": true - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/cjk-width-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/cjk-width-tokenfilter.asciidoc deleted file mode 100644 index e055d1783d4f6..0000000000000 --- a/docs/reference/analysis/tokenfilters/cjk-width-tokenfilter.asciidoc +++ /dev/null @@ -1,83 +0,0 @@ -[[analysis-cjk-width-tokenfilter]] -=== CJK width token filter -++++ -CJK width -++++ - -Normalizes width differences in CJK (Chinese, Japanese, and Korean) characters -as follows: - -* Folds full-width ASCII character variants into the equivalent basic Latin -characters -* Folds half-width Katakana character variants into the equivalent Kana -characters - -This filter is included in {es}'s built-in <>. It uses Lucene's -{lucene-analysis-docs}/cjk/CJKWidthFilter.html[CJKWidthFilter]. - -NOTE: This token filter can be viewed as a subset of NFKC/NFKD Unicode -normalization. See the -{plugins}/analysis-icu-normalization-charfilter.html[`analysis-icu` plugin] for -full normalization support. - -[[analysis-cjk-width-tokenfilter-analyze-ex]] -==== Example - -[source,console] --------------------------------------------------- -GET /_analyze -{ - "tokenizer" : "standard", - "filter" : ["cjk_width"], - "text" : "シーサイドライナー" -} --------------------------------------------------- - -The filter produces the following token: - -[source,text] --------------------------------------------------- -シーサイドライナー --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "シーサイドライナー", - "start_offset" : 0, - "end_offset" : 10, - "type" : "", - "position" : 0 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-cjk-width-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -CJK width token filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT /cjk_width_example -{ - "settings": { - "analysis": { - "analyzer": { - "standard_cjk_width": { - "tokenizer": "standard", - "filter": [ "cjk_width" ] - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/classic-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/classic-tokenfilter.asciidoc deleted file mode 100644 index 8bab797a7506d..0000000000000 --- a/docs/reference/analysis/tokenfilters/classic-tokenfilter.asciidoc +++ /dev/null @@ -1,147 +0,0 @@ -[[analysis-classic-tokenfilter]] -=== Classic token filter -++++ -Classic -++++ - -Performs optional post-processing of terms generated by the -<>. - -This filter removes the english possessive (`'s`) from the end of words and -removes dots from acronyms. It uses Lucene's -{lucene-analysis-docs}/standard/ClassicFilter.html[ClassicFilter]. - -[[analysis-classic-tokenfilter-analyze-ex]] -==== Example - -The following <> request demonstrates how the -classic token filter works. - -[source,console] --------------------------------------------------- -GET /_analyze -{ - "tokenizer" : "classic", - "filter" : ["classic"], - "text" : "The 2 Q.U.I.C.K. Brown-Foxes jumped over the lazy dog's bone." -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ The, 2, QUICK, Brown, Foxes, jumped, over, the, lazy, dog, bone ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "The", - "start_offset" : 0, - "end_offset" : 3, - "type" : "", - "position" : 0 - }, - { - "token" : "2", - "start_offset" : 4, - "end_offset" : 5, - "type" : "", - "position" : 1 - }, - { - "token" : "QUICK", - "start_offset" : 6, - "end_offset" : 16, - "type" : "", - "position" : 2 - }, - { - "token" : "Brown", - "start_offset" : 17, - "end_offset" : 22, - "type" : "", - "position" : 3 - }, - { - "token" : "Foxes", - "start_offset" : 23, - "end_offset" : 28, - "type" : "", - "position" : 4 - }, - { - "token" : "jumped", - "start_offset" : 29, - "end_offset" : 35, - "type" : "", - "position" : 5 - }, - { - "token" : "over", - "start_offset" : 36, - "end_offset" : 40, - "type" : "", - "position" : 6 - }, - { - "token" : "the", - "start_offset" : 41, - "end_offset" : 44, - "type" : "", - "position" : 7 - }, - { - "token" : "lazy", - "start_offset" : 45, - "end_offset" : 49, - "type" : "", - "position" : 8 - }, - { - "token" : "dog", - "start_offset" : 50, - "end_offset" : 55, - "type" : "", - "position" : 9 - }, - { - "token" : "bone", - "start_offset" : 56, - "end_offset" : 60, - "type" : "", - "position" : 10 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-classic-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -classic token filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT /classic_example -{ - "settings": { - "analysis": { - "analyzer": { - "classic_analyzer": { - "tokenizer": "classic", - "filter": [ "classic" ] - } - } - } - } -} --------------------------------------------------- - diff --git a/docs/reference/analysis/tokenfilters/common-grams-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/common-grams-tokenfilter.asciidoc deleted file mode 100644 index 4913df9290c47..0000000000000 --- a/docs/reference/analysis/tokenfilters/common-grams-tokenfilter.asciidoc +++ /dev/null @@ -1,228 +0,0 @@ -[[analysis-common-grams-tokenfilter]] -=== Common grams token filter -++++ -Common grams -++++ - -Generates {wikipedia}/Bigram[bigrams] for a specified set of -common words. - -For example, you can specify `is` and `the` as common words. This filter then -converts the tokens `[the, quick, fox, is, brown]` to `[the, the_quick, quick, -fox, fox_is, is, is_brown, brown]`. - -You can use the `common_grams` filter in place of the -<> when you don't want to -completely ignore common words. - -This filter uses Lucene's -{lucene-analysis-docs}/commongrams/CommonGramsFilter.html[CommonGramsFilter]. - -[[analysis-common-grams-analyze-ex]] -==== Example - -The following <> request creates bigrams for `is` -and `the`: - -[source,console] --------------------------------------------------- -GET /_analyze -{ - "tokenizer" : "whitespace", - "filter" : [ - { - "type": "common_grams", - "common_words": ["is", "the"] - } - ], - "text" : "the quick fox is brown" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ the, the_quick, quick, fox, fox_is, is, is_brown, brown ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "the", - "start_offset" : 0, - "end_offset" : 3, - "type" : "word", - "position" : 0 - }, - { - "token" : "the_quick", - "start_offset" : 0, - "end_offset" : 9, - "type" : "gram", - "position" : 0, - "positionLength" : 2 - }, - { - "token" : "quick", - "start_offset" : 4, - "end_offset" : 9, - "type" : "word", - "position" : 1 - }, - { - "token" : "fox", - "start_offset" : 10, - "end_offset" : 13, - "type" : "word", - "position" : 2 - }, - { - "token" : "fox_is", - "start_offset" : 10, - "end_offset" : 16, - "type" : "gram", - "position" : 2, - "positionLength" : 2 - }, - { - "token" : "is", - "start_offset" : 14, - "end_offset" : 16, - "type" : "word", - "position" : 3 - }, - { - "token" : "is_brown", - "start_offset" : 14, - "end_offset" : 22, - "type" : "gram", - "position" : 3, - "positionLength" : 2 - }, - { - "token" : "brown", - "start_offset" : 17, - "end_offset" : 22, - "type" : "word", - "position" : 4 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-common-grams-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`common_grams` filter to configure a new -<>: - -[source,console] --------------------------------------------------- -PUT /common_grams_example -{ - "settings": { - "analysis": { - "analyzer": { - "index_grams": { - "tokenizer": "whitespace", - "filter": [ "common_grams" ] - } - }, - "filter": { - "common_grams": { - "type": "common_grams", - "common_words": [ "a", "is", "the" ] - } - } - } - } -} --------------------------------------------------- - -[[analysis-common-grams-tokenfilter-configure-parms]] -==== Configurable parameters - -`common_words`:: -+ --- -(Required+++*+++, array of strings) -A list of tokens. The filter generates bigrams for these tokens. - -Either this or the `common_words_path` parameter is required. --- - -`common_words_path`:: -+ --- -(Required+++*+++, string) -Path to a file containing a list of tokens. The filter generates bigrams for -these tokens. - -This path must be absolute or relative to the `config` location. The file must -be UTF-8 encoded. Each token in the file must be separated by a line break. - -Either this or the `common_words` parameter is required. --- - -`ignore_case`:: -(Optional, Boolean) -If `true`, matches for common words matching are case-insensitive. -Defaults to `false`. - -`query_mode`:: -+ --- -(Optional, Boolean) -If `true`, the filter excludes the following tokens from the output: - -* Unigrams for common words -* Unigrams for terms followed by common words - -Defaults to `false`. We recommend enabling this parameter for -<>. - -For example, you can enable this parameter and specify `is` and `the` as -common words. This filter converts the tokens `[the, quick, fox, is, brown]` to -`[the_quick, quick, fox_is, is_brown,]`. --- - -[[analysis-common-grams-tokenfilter-customize]] -==== Customize - -To customize the `common_grams` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom `common_grams` filter with -`ignore_case` and `query_mode` set to `true`: - -[source,console] --------------------------------------------------- -PUT /common_grams_example -{ - "settings": { - "analysis": { - "analyzer": { - "index_grams": { - "tokenizer": "whitespace", - "filter": [ "common_grams_query" ] - } - }, - "filter": { - "common_grams_query": { - "type": "common_grams", - "common_words": [ "a", "is", "the" ], - "ignore_case": true, - "query_mode": true - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/condition-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/condition-tokenfilter.asciidoc deleted file mode 100644 index 2bf3aa116984e..0000000000000 --- a/docs/reference/analysis/tokenfilters/condition-tokenfilter.asciidoc +++ /dev/null @@ -1,148 +0,0 @@ -[[analysis-condition-tokenfilter]] -=== Conditional token filter -++++ -Conditional -++++ - -Applies a set of token filters to tokens that match conditions in a provided -predicate script. - -This filter uses Lucene's -{lucene-analysis-docs}/miscellaneous/ConditionalTokenFilter.html[ConditionalTokenFilter]. - -[[analysis-condition-analyze-ex]] -==== Example - -The following <> request uses the `condition` -filter to match tokens with fewer than 5 characters in `THE QUICK BROWN FOX`. -It then applies the <> filter to -those matching tokens, converting them to lowercase. - -[source,console] --------------------------------------------------- -GET /_analyze -{ - "tokenizer": "standard", - "filter": [ - { - "type": "condition", - "filter": [ "lowercase" ], - "script": { - "source": "token.getTerm().length() < 5" - } - } - ], - "text": "THE QUICK BROWN FOX" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ the, QUICK, BROWN, fox ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "the", - "start_offset" : 0, - "end_offset" : 3, - "type" : "", - "position" : 0 - }, - { - "token" : "QUICK", - "start_offset" : 4, - "end_offset" : 9, - "type" : "", - "position" : 1 - }, - { - "token" : "BROWN", - "start_offset" : 10, - "end_offset" : 15, - "type" : "", - "position" : 2 - }, - { - "token" : "fox", - "start_offset" : 16, - "end_offset" : 19, - "type" : "", - "position" : 3 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-condition-tokenfilter-configure-parms]] -==== Configurable parameters - -`filter`:: -+ --- -(Required, array of token filters) -Array of token filters. If a token matches the predicate script in the `script` -parameter, these filters are applied to the token in the order provided. - -These filters can include custom token filters defined in the index mapping. --- - -`script`:: -+ --- -(Required, <>) -Predicate script used to apply token filters. If a token -matches this script, the filters in the `filter` parameter are applied to the -token. - -For valid parameters, see <>. Only inline scripts are -supported. Painless scripts are executed in the -{painless}/painless-analysis-predicate-context.html[analysis predicate context] -and require a `token` property. --- - -[[analysis-condition-tokenfilter-customize]] -==== Customize and add to an analyzer - -To customize the `condition` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following <> request -uses a custom `condition` filter to configure a new -<>. The custom `condition` filter -matches the first token in a stream. It then reverses that matching token using -the <> filter. - -[source,console] --------------------------------------------------- -PUT /palindrome_list -{ - "settings": { - "analysis": { - "analyzer": { - "whitespace_reverse_first_token": { - "tokenizer": "whitespace", - "filter": [ "reverse_first_token" ] - } - }, - "filter": { - "reverse_first_token": { - "type": "condition", - "filter": [ "reverse" ], - "script": { - "source": "token.getPosition() === 0" - } - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/decimal-digit-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/decimal-digit-tokenfilter.asciidoc deleted file mode 100644 index 6436bad8ac3f1..0000000000000 --- a/docs/reference/analysis/tokenfilters/decimal-digit-tokenfilter.asciidoc +++ /dev/null @@ -1,89 +0,0 @@ -[[analysis-decimal-digit-tokenfilter]] -=== Decimal digit token filter -++++ -Decimal digit -++++ - -Converts all digits in the Unicode `Decimal_Number` General Category to `0-9`. -For example, the filter changes the Bengali numeral `৩` to `3`. - -This filter uses Lucene's -{lucene-analysis-docs}/core/DecimalDigitFilter.html[DecimalDigitFilter]. - -[[analysis-decimal-digit-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `decimal_digit` -filter to convert Devanagari numerals to `0-9`: - -[source,console] --------------------------------------------------- -GET /_analyze -{ - "tokenizer" : "whitespace", - "filter" : ["decimal_digit"], - "text" : "१-one two-२ ३" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ 1-one, two-2, 3] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "1-one", - "start_offset" : 0, - "end_offset" : 5, - "type" : "word", - "position" : 0 - }, - { - "token" : "two-2", - "start_offset" : 6, - "end_offset" : 11, - "type" : "word", - "position" : 1 - }, - { - "token" : "3", - "start_offset" : 12, - "end_offset" : 13, - "type" : "word", - "position" : 2 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-decimal-digit-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`decimal_digit` filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT /decimal_digit_example -{ - "settings": { - "analysis": { - "analyzer": { - "whitespace_decimal_digit": { - "tokenizer": "whitespace", - "filter": [ "decimal_digit" ] - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/delimited-payload-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/delimited-payload-tokenfilter.asciidoc deleted file mode 100644 index b775f2995f368..0000000000000 --- a/docs/reference/analysis/tokenfilters/delimited-payload-tokenfilter.asciidoc +++ /dev/null @@ -1,323 +0,0 @@ -[[analysis-delimited-payload-tokenfilter]] -=== Delimited payload token filter -++++ -Delimited payload -++++ - -[WARNING] -==== -The older name `delimited_payload_filter` is deprecated and should not be used -with new indices. Use `delimited_payload` instead. -==== - -Separates a token stream into tokens and payloads based on a specified -delimiter. - -For example, you can use the `delimited_payload` filter with a `|` delimiter to -split `the|1 quick|2 fox|3` into the tokens `the`, `quick`, and `fox` -with respective payloads of `1`, `2`, and `3`. - -This filter uses Lucene's -{lucene-analysis-docs}/payloads/DelimitedPayloadTokenFilter.html[DelimitedPayloadTokenFilter]. - -[NOTE] -.Payloads -==== -A payload is user-defined binary data associated with a token position and -stored as base64-encoded bytes. - -{es} does not store token payloads by default. To store payloads, you must: - -* Set the <> mapping parameter to - `with_positions_payloads` or `with_positions_offsets_payloads` for any field - storing payloads. -* Use an index analyzer that includes the `delimited_payload` filter - -You can view stored payloads using the <>. -==== - -[[analysis-delimited-payload-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the -`delimited_payload` filter with the default `|` delimiter to split -`the|0 brown|10 fox|5 is|0 quick|10` into tokens and payloads. - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer": "whitespace", - "filter": ["delimited_payload"], - "text": "the|0 brown|10 fox|5 is|0 quick|10" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ the, brown, fox, is, quick ] --------------------------------------------------- - -Note that the analyze API does not return stored payloads. For an example that -includes returned payloads, see -<>. - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens": [ - { - "token": "the", - "start_offset": 0, - "end_offset": 5, - "type": "word", - "position": 0 - }, - { - "token": "brown", - "start_offset": 6, - "end_offset": 14, - "type": "word", - "position": 1 - }, - { - "token": "fox", - "start_offset": 15, - "end_offset": 20, - "type": "word", - "position": 2 - }, - { - "token": "is", - "start_offset": 21, - "end_offset": 25, - "type": "word", - "position": 3 - }, - { - "token": "quick", - "start_offset": 26, - "end_offset": 34, - "type": "word", - "position": 4 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-delimited-payload-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`delimited-payload` filter to configure a new <>. - -[source,console] --------------------------------------------------- -PUT delimited_payload -{ - "settings": { - "analysis": { - "analyzer": { - "whitespace_delimited_payload": { - "tokenizer": "whitespace", - "filter": [ "delimited_payload" ] - } - } - } - } -} --------------------------------------------------- - -[[analysis-delimited-payload-tokenfilter-configure-parms]] -==== Configurable parameters - -`delimiter`:: -(Optional, string) -Character used to separate tokens from payloads. Defaults to `|`. - -`encoding`:: -+ --- -(Optional, string) -Data type for the stored payload. Valid values are: - -`float`::: -(Default) Float - -`identity`::: -Characters - -`int`::: -Integer --- - -[[analysis-delimited-payload-tokenfilter-customize]] -==== Customize and add to an analyzer - -To customize the `delimited_payload` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following <> request -uses a custom `delimited_payload` filter to configure a new -<>. The custom `delimited_payload` -filter uses the `+` delimiter to separate tokens from payloads. Payloads are -encoded as integers. - -[source,console] --------------------------------------------------- -PUT delimited_payload_example -{ - "settings": { - "analysis": { - "analyzer": { - "whitespace_plus_delimited": { - "tokenizer": "whitespace", - "filter": [ "plus_delimited" ] - } - }, - "filter": { - "plus_delimited": { - "type": "delimited_payload", - "delimiter": "+", - "encoding": "int" - } - } - } - } -} --------------------------------------------------- - -[[analysis-delimited-payload-tokenfilter-return-stored-payloads]] -==== Return stored payloads - -Use the <> to create an index that: - -* Includes a field that stores term vectors with payloads. -* Uses a <> with the - `delimited_payload` filter. - -[source,console] --------------------------------------------------- -PUT text_payloads -{ - "mappings": { - "properties": { - "text": { - "type": "text", - "term_vector": "with_positions_payloads", - "analyzer": "payload_delimiter" - } - } - }, - "settings": { - "analysis": { - "analyzer": { - "payload_delimiter": { - "tokenizer": "whitespace", - "filter": [ "delimited_payload" ] - } - } - } - } -} --------------------------------------------------- - -Add a document containing payloads to the index. - -[source,console] --------------------------------------------------- -POST text_payloads/_doc/1 -{ - "text": "the|0 brown|3 fox|4 is|0 quick|10" -} --------------------------------------------------- -// TEST[continued] - -Use the <> to return the document's tokens -and base64-encoded payloads. - -[source,console] --------------------------------------------------- -GET text_payloads/_termvectors/1 -{ - "fields": [ "text" ], - "payloads": true -} --------------------------------------------------- -// TEST[continued] - -The API returns the following response: - -[source,console-result] --------------------------------------------------- -{ - "_index": "text_payloads", - "_id": "1", - "_version": 1, - "found": true, - "took": 8, - "term_vectors": { - "text": { - "field_statistics": { - "sum_doc_freq": 5, - "doc_count": 1, - "sum_ttf": 5 - }, - "terms": { - "brown": { - "term_freq": 1, - "tokens": [ - { - "position": 1, - "payload": "QEAAAA==" - } - ] - }, - "fox": { - "term_freq": 1, - "tokens": [ - { - "position": 2, - "payload": "QIAAAA==" - } - ] - }, - "is": { - "term_freq": 1, - "tokens": [ - { - "position": 3, - "payload": "AAAAAA==" - } - ] - }, - "quick": { - "term_freq": 1, - "tokens": [ - { - "position": 4, - "payload": "QSAAAA==" - } - ] - }, - "the": { - "term_freq": 1, - "tokens": [ - { - "position": 0, - "payload": "AAAAAA==" - } - ] - } - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 8/"took": "$body.took"/] diff --git a/docs/reference/analysis/tokenfilters/dictionary-decompounder-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/dictionary-decompounder-tokenfilter.asciidoc deleted file mode 100644 index 0e3c5804f261c..0000000000000 --- a/docs/reference/analysis/tokenfilters/dictionary-decompounder-tokenfilter.asciidoc +++ /dev/null @@ -1,173 +0,0 @@ -[[analysis-dict-decomp-tokenfilter]] -=== Dictionary decompounder token filter -++++ -Dictionary decompounder -++++ - -[NOTE] -==== -In most cases, we recommend using the faster -<> token filter -in place of this filter. However, you can use the -`dictionary_decompounder` filter to check the quality of a word list before -implementing it in the `hyphenation_decompounder` filter. -==== - -Uses a specified list of words and a brute force approach to find subwords in -compound words. If found, these subwords are included in the token output. - -This filter uses Lucene's -{lucene-analysis-docs}/compound/DictionaryCompoundWordTokenFilter.html[DictionaryCompoundWordTokenFilter], -which was built for Germanic languages. - -[[analysis-dict-decomp-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the -`dictionary_decompounder` filter to find subwords in `Donaudampfschiff`. The -filter then checks these subwords against the specified list of words: `Donau`, -`dampf`, `meer`, and `schiff`. - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer": "standard", - "filter": [ - { - "type": "dictionary_decompounder", - "word_list": ["Donau", "dampf", "meer", "schiff"] - } - ], - "text": "Donaudampfschiff" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ Donaudampfschiff, Donau, dampf, schiff ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "Donaudampfschiff", - "start_offset" : 0, - "end_offset" : 16, - "type" : "", - "position" : 0 - }, - { - "token" : "Donau", - "start_offset" : 0, - "end_offset" : 16, - "type" : "", - "position" : 0 - }, - { - "token" : "dampf", - "start_offset" : 0, - "end_offset" : 16, - "type" : "", - "position" : 0 - }, - { - "token" : "schiff", - "start_offset" : 0, - "end_offset" : 16, - "type" : "", - "position" : 0 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-dict-decomp-tokenfilter-configure-parms]] -==== Configurable parameters - -`word_list`:: -+ --- -(Required+++*+++, array of strings) -A list of subwords to look for in the token stream. If found, the subword is -included in the token output. - -Either this parameter or `word_list_path` must be specified. --- - -`word_list_path`:: -+ --- -(Required+++*+++, string) -Path to a file that contains a list of subwords to find in the token stream. If -found, the subword is included in the token output. - -This path must be absolute or relative to the `config` location, and the file -must be UTF-8 encoded. Each token in the file must be separated by a line break. - -Either this parameter or `word_list` must be specified. --- - -`max_subword_size`:: -(Optional, integer) -Maximum subword character length. Longer subword tokens are excluded from the -output. Defaults to `15`. - -`min_subword_size`:: -(Optional, integer) -Minimum subword character length. Shorter subword tokens are excluded from the -output. Defaults to `2`. - -`min_word_size`:: -(Optional, integer) -Minimum word character length. Shorter word tokens are excluded from the -output. Defaults to `5`. - -`only_longest_match`:: -(Optional, Boolean) -If `true`, only include the longest matching subword. Defaults to `false`. - -[[analysis-dict-decomp-tokenfilter-customize]] -==== Customize and add to an analyzer - -To customize the `dictionary_decompounder` filter, duplicate it to create the -basis for a new custom token filter. You can modify the filter using its -configurable parameters. - -For example, the following <> request -uses a custom `dictionary_decompounder` filter to configure a new -<>. - -The custom `dictionary_decompounder` filter find subwords in the -`analysis/example_word_list.txt` file. Subwords longer than 22 characters are -excluded from the token output. - -[source,console] --------------------------------------------------- -PUT dictionary_decompound_example -{ - "settings": { - "analysis": { - "analyzer": { - "standard_dictionary_decompound": { - "tokenizer": "standard", - "filter": [ "22_char_dictionary_decompound" ] - } - }, - "filter": { - "22_char_dictionary_decompound": { - "type": "dictionary_decompounder", - "word_list_path": "analysis/example_word_list.txt", - "max_subword_size": 22 - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc deleted file mode 100644 index 97064adf19252..0000000000000 --- a/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc +++ /dev/null @@ -1,244 +0,0 @@ -[[analysis-edgengram-tokenfilter]] -=== Edge n-gram token filter -++++ -Edge n-gram -++++ - -Forms an {wikipedia}/N-gram[n-gram] of a specified length from -the beginning of a token. - -For example, you can use the `edge_ngram` token filter to change `quick` to -`qu`. - -When not customized, the filter creates 1-character edge n-grams by default. - -This filter uses Lucene's -{lucene-analysis-docs}/ngram/EdgeNGramTokenFilter.html[EdgeNGramTokenFilter]. - -[NOTE] -==== -The `edge_ngram` filter is similar to the <>. However, the `edge_ngram` only outputs n-grams that start at the -beginning of a token. These edge n-grams are useful for -<> queries. -==== - -[[analysis-edgengram-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `edge_ngram` -filter to convert `the quick brown fox jumps` to 1-character and 2-character -edge n-grams: - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer": "standard", - "filter": [ - { "type": "edge_ngram", - "min_gram": 1, - "max_gram": 2 - } - ], - "text": "the quick brown fox jumps" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ t, th, q, qu, b, br, f, fo, j, ju ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "t", - "start_offset" : 0, - "end_offset" : 3, - "type" : "", - "position" : 0 - }, - { - "token" : "th", - "start_offset" : 0, - "end_offset" : 3, - "type" : "", - "position" : 0 - }, - { - "token" : "q", - "start_offset" : 4, - "end_offset" : 9, - "type" : "", - "position" : 1 - }, - { - "token" : "qu", - "start_offset" : 4, - "end_offset" : 9, - "type" : "", - "position" : 1 - }, - { - "token" : "b", - "start_offset" : 10, - "end_offset" : 15, - "type" : "", - "position" : 2 - }, - { - "token" : "br", - "start_offset" : 10, - "end_offset" : 15, - "type" : "", - "position" : 2 - }, - { - "token" : "f", - "start_offset" : 16, - "end_offset" : 19, - "type" : "", - "position" : 3 - }, - { - "token" : "fo", - "start_offset" : 16, - "end_offset" : 19, - "type" : "", - "position" : 3 - }, - { - "token" : "j", - "start_offset" : 20, - "end_offset" : 25, - "type" : "", - "position" : 4 - }, - { - "token" : "ju", - "start_offset" : 20, - "end_offset" : 25, - "type" : "", - "position" : 4 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-edgengram-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`edge_ngram` filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT edge_ngram_example -{ - "settings": { - "analysis": { - "analyzer": { - "standard_edge_ngram": { - "tokenizer": "standard", - "filter": [ "edge_ngram" ] - } - } - } - } -} --------------------------------------------------- - -[[analysis-edgengram-tokenfilter-configure-parms]] -==== Configurable parameters - -`max_gram`:: -+ --- -(Optional, integer) -Maximum character length of a gram. For custom token filters, defaults to `2`. -For the built-in `edge_ngram` filter, defaults to `1`. - -See <>. --- - -`min_gram`:: -(Optional, integer) -Minimum character length of a gram. Defaults to `1`. - -`preserve_original`:: -(Optional, Boolean) -Emits original token when set to `true`. Defaults to `false`. - -`side`:: -+ --- -(Optional, string) -deprecated:[8.16.0, use <> token filter before and after `edge_ngram` for same results]. -Indicates whether to truncate tokens from the `front` or `back`. Defaults to `front`. --- - -[[analysis-edgengram-tokenfilter-customize]] -==== Customize - -To customize the `edge_ngram` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom `edge_ngram` -filter that forms n-grams between 3-5 characters. - -[source,console] --------------------------------------------------- -PUT edge_ngram_custom_example -{ - "settings": { - "analysis": { - "analyzer": { - "default": { - "tokenizer": "whitespace", - "filter": [ "3_5_edgegrams" ] - } - }, - "filter": { - "3_5_edgegrams": { - "type": "edge_ngram", - "min_gram": 3, - "max_gram": 5 - } - } - } - } -} --------------------------------------------------- - -[[analysis-edgengram-tokenfilter-max-gram-limits]] -==== Limitations of the `max_gram` parameter - -The `edge_ngram` filter's `max_gram` value limits the character length of -tokens. When the `edge_ngram` filter is used with an index analyzer, this -means search terms longer than the `max_gram` length may not match any indexed -terms. - -For example, if the `max_gram` is `3`, searches for `apple` won't match the -indexed term `app`. - -To account for this, you can use the -<> filter with a search analyzer -to shorten search terms to the `max_gram` character length. However, this could -return irrelevant results. - -For example, if the `max_gram` is `3` and search terms are truncated to three -characters, the search term `apple` is shortened to `app`. This means searches -for `apple` return any indexed terms matching `app`, such as `apply`, `snapped`, -and `apple`. - -We recommend testing both approaches to see which best fits your -use case and desired search experience. diff --git a/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc deleted file mode 100644 index a1e9a5d9ea151..0000000000000 --- a/docs/reference/analysis/tokenfilters/elision-tokenfilter.asciidoc +++ /dev/null @@ -1,186 +0,0 @@ -[[analysis-elision-tokenfilter]] -=== Elision token filter -++++ -Elision -++++ - -Removes specified {wikipedia}/Elision[elisions] from -the beginning of tokens. For example, you can use this filter to change -`l'avion` to `avion`. - -When not customized, the filter removes the following French elisions by default: - -`l'`, `m'`, `t'`, `qu'`, `n'`, `s'`, `j'`, `d'`, `c'`, `jusqu'`, `quoiqu'`, -`lorsqu'`, `puisqu'` - -Customized versions of this filter are included in several of {es}'s built-in -<>: - -* <> -* <> -* <> -* <> - -This filter uses Lucene's -{lucene-analysis-docs}/util/ElisionFilter.html[ElisionFilter]. - -[[analysis-elision-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `elision` -filter to remove `j'` from `j’examine près du wharf`: - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer" : "standard", - "filter" : ["elision"], - "text" : "j’examine près du wharf" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ examine, près, du, wharf ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "examine", - "start_offset" : 0, - "end_offset" : 9, - "type" : "", - "position" : 0 - }, - { - "token" : "près", - "start_offset" : 10, - "end_offset" : 14, - "type" : "", - "position" : 1 - }, - { - "token" : "du", - "start_offset" : 15, - "end_offset" : 17, - "type" : "", - "position" : 2 - }, - { - "token" : "wharf", - "start_offset" : 18, - "end_offset" : 23, - "type" : "", - "position" : 3 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-elision-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`elision` filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT /elision_example -{ - "settings": { - "analysis": { - "analyzer": { - "whitespace_elision": { - "tokenizer": "whitespace", - "filter": [ "elision" ] - } - } - } - } -} --------------------------------------------------- - -[[analysis-elision-tokenfilter-configure-parms]] -==== Configurable parameters - -[[analysis-elision-tokenfilter-articles]] -`articles`:: -+ --- -(Required+++*+++, array of string) -List of elisions to remove. - -To be removed, the elision must be at the beginning of a token and be -immediately followed by an apostrophe. Both the elision and apostrophe are -removed. - -For custom `elision` filters, either this parameter or `articles_path` must be -specified. --- - -`articles_path`:: -+ --- -(Required+++*+++, string) -Path to a file that contains a list of elisions to remove. - -This path must be absolute or relative to the `config` location, and the file -must be UTF-8 encoded. Each elision in the file must be separated by a line -break. - -To be removed, the elision must be at the beginning of a token and be -immediately followed by an apostrophe. Both the elision and apostrophe are -removed. - -For custom `elision` filters, either this parameter or `articles` must be -specified. --- - -`articles_case`:: -(Optional, Boolean) -If `true`, elision matching is case insensitive. If `false`, elision matching is -case sensitive. Defaults to `false`. - -[[analysis-elision-tokenfilter-customize]] -==== Customize - -To customize the `elision` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom case-insensitive `elision` -filter that removes the `l'`, `m'`, `t'`, `qu'`, `n'`, `s'`, -and `j'` elisions: - -[source,console] --------------------------------------------------- -PUT /elision_case_insensitive_example -{ - "settings": { - "analysis": { - "analyzer": { - "default": { - "tokenizer": "whitespace", - "filter": [ "elision_case_insensitive" ] - } - }, - "filter": { - "elision_case_insensitive": { - "type": "elision", - "articles": [ "l", "m", "t", "qu", "n", "s", "j" ], - "articles_case": true - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/fingerprint-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/fingerprint-tokenfilter.asciidoc deleted file mode 100644 index 78a963763516b..0000000000000 --- a/docs/reference/analysis/tokenfilters/fingerprint-tokenfilter.asciidoc +++ /dev/null @@ -1,138 +0,0 @@ -[[analysis-fingerprint-tokenfilter]] -=== Fingerprint token filter -++++ -Fingerprint -++++ - -Sorts and removes duplicate tokens from a token stream, then concatenates the -stream into a single output token. - -For example, this filter changes the `[ the, fox, was, very, very, quick ]` -token stream as follows: - -. Sorts the tokens alphabetically to `[ fox, quick, the, very, very, was ]` - -. Removes a duplicate instance of the `very` token. - -. Concatenates the token stream to a output single token: `[fox quick the very was ]` - -Output tokens produced by this filter are useful for -fingerprinting and clustering a body of text as described in the -https://github.com/OpenRefine/OpenRefine/wiki/Clustering-In-Depth#fingerprint[OpenRefine -project]. - -This filter uses Lucene's -{lucene-analysis-docs}/miscellaneous/FingerprintFilter.html[FingerprintFilter]. - -[[analysis-fingerprint-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `fingerprint` -filter to create a single output token for the text `zebra jumps over resting -resting dog`: - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer" : "whitespace", - "filter" : ["fingerprint"], - "text" : "zebra jumps over resting resting dog" -} --------------------------------------------------- - -The filter produces the following token: - -[source,text] --------------------------------------------------- -[ dog jumps over resting zebra ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "dog jumps over resting zebra", - "start_offset" : 0, - "end_offset" : 36, - "type" : "fingerprint", - "position" : 0 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-fingerprint-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`fingerprint` filter to configure a new <>. - -[source,console] --------------------------------------------------- -PUT fingerprint_example -{ - "settings": { - "analysis": { - "analyzer": { - "whitespace_fingerprint": { - "tokenizer": "whitespace", - "filter": [ "fingerprint" ] - } - } - } - } -} --------------------------------------------------- - -[[analysis-fingerprint-tokenfilter-configure-parms]] -==== Configurable parameters - -[[analysis-fingerprint-tokenfilter-max-size]] -`max_output_size`:: -(Optional, integer) -Maximum character length, including whitespace, of the output token. Defaults to -`255`. Concatenated tokens longer than this will result in no token output. - -`separator`:: -(Optional, string) -Character to use to concatenate the token stream input. Defaults to a space. - -[[analysis-fingerprint-tokenfilter-customize]] -==== Customize - -To customize the `fingerprint` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom `fingerprint` filter with -that use `+` to concatenate token streams. The filter also limits -output tokens to `100` characters or fewer. - -[source,console] --------------------------------------------------- -PUT custom_fingerprint_example -{ - "settings": { - "analysis": { - "analyzer": { - "whitespace_": { - "tokenizer": "whitespace", - "filter": [ "fingerprint_plus_concat" ] - } - }, - "filter": { - "fingerprint_plus_concat": { - "type": "fingerprint", - "max_output_size": 100, - "separator": "+" - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/flatten-graph-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/flatten-graph-tokenfilter.asciidoc deleted file mode 100644 index b719ea376a279..0000000000000 --- a/docs/reference/analysis/tokenfilters/flatten-graph-tokenfilter.asciidoc +++ /dev/null @@ -1,227 +0,0 @@ -[[analysis-flatten-graph-tokenfilter]] -=== Flatten graph token filter -++++ -Flatten graph -++++ - -Flattens a <> produced by a graph token filter, such -as <> or -<>. - -Flattening a token graph containing -<> makes the graph -suitable for <>. Otherwise, indexing does -not support token graphs containing multi-position tokens. - -[WARNING] -==== -Flattening graphs is a lossy process. - -If possible, avoid using the `flatten_graph` filter. Instead, use graph token -filters in <> only. This eliminates -the need for the `flatten_graph` filter. -==== - -The `flatten_graph` filter uses Lucene's -{lucene-analysis-docs}/core/FlattenGraphFilter.html[FlattenGraphFilter]. - -[[analysis-flatten-graph-tokenfilter-analyze-ex]] -==== Example - -To see how the `flatten_graph` filter works, you first need to produce a token -graph containing multi-position tokens. - -The following <> request uses the `synonym_graph` -filter to add `dns` as a multi-position synonym for `domain name system` in the -text `domain name system is fragile`: - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "standard", - "filter": [ - { - "type": "synonym_graph", - "synonyms": [ "dns, domain name system" ] - } - ], - "text": "domain name system is fragile" -} ----- - -The filter produces the following token graph with `dns` as a multi-position -token. - -image::images/analysis/token-graph-dns-synonym-ex.svg[align="center"] - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "dns", - "start_offset": 0, - "end_offset": 18, - "type": "SYNONYM", - "position": 0, - "positionLength": 3 - }, - { - "token": "domain", - "start_offset": 0, - "end_offset": 6, - "type": "", - "position": 0 - }, - { - "token": "name", - "start_offset": 7, - "end_offset": 11, - "type": "", - "position": 1 - }, - { - "token": "system", - "start_offset": 12, - "end_offset": 18, - "type": "", - "position": 2 - }, - { - "token": "is", - "start_offset": 19, - "end_offset": 21, - "type": "", - "position": 3 - }, - { - "token": "fragile", - "start_offset": 22, - "end_offset": 29, - "type": "", - "position": 4 - } - ] -} ----- -//// - -Indexing does not support token graphs containing multi-position tokens. To make -this token graph suitable for indexing, it needs to be flattened. - -To flatten the token graph, add the `flatten_graph` filter after the -`synonym_graph` filter in the previous analyze API request. - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "standard", - "filter": [ - { - "type": "synonym_graph", - "synonyms": [ "dns, domain name system" ] - }, - "flatten_graph" - ], - "text": "domain name system is fragile" -} ----- - -The filter produces the following flattened token graph, which is suitable for -indexing. - -image::images/analysis/token-graph-dns-invalid-ex.svg[align="center"] - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "dns", - "start_offset": 0, - "end_offset": 18, - "type": "SYNONYM", - "position": 0, - "positionLength": 3 - }, - { - "token": "domain", - "start_offset": 0, - "end_offset": 6, - "type": "", - "position": 0 - }, - { - "token": "name", - "start_offset": 7, - "end_offset": 11, - "type": "", - "position": 1 - }, - { - "token": "system", - "start_offset": 12, - "end_offset": 18, - "type": "", - "position": 2 - }, - { - "token": "is", - "start_offset": 19, - "end_offset": 21, - "type": "", - "position": 3 - }, - { - "token": "fragile", - "start_offset": 22, - "end_offset": 29, - "type": "", - "position": 4 - } - ] -} ----- -//// - -[[analysis-keyword-marker-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`flatten_graph` token filter to configure a new -<>. - -In this analyzer, a custom `word_delimiter_graph` filter produces token graphs -containing catenated, multi-position tokens. The `flatten_graph` filter flattens -these token graphs, making them suitable for indexing. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_custom_index_analyzer": { - "type": "custom", - "tokenizer": "standard", - "filter": [ - "my_custom_word_delimiter_graph_filter", - "flatten_graph" - ] - } - }, - "filter": { - "my_custom_word_delimiter_graph_filter": { - "type": "word_delimiter_graph", - "catenate_all": true - } - } - } - } -} ----- \ No newline at end of file diff --git a/docs/reference/analysis/tokenfilters/hunspell-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/hunspell-tokenfilter.asciidoc deleted file mode 100644 index ca8d698490b87..0000000000000 --- a/docs/reference/analysis/tokenfilters/hunspell-tokenfilter.asciidoc +++ /dev/null @@ -1,234 +0,0 @@ -[[analysis-hunspell-tokenfilter]] -=== Hunspell token filter -++++ -Hunspell -++++ - -Provides <> based on a provided -{wikipedia}/Hunspell[Hunspell dictionary]. The `hunspell` -filter requires -<> of one or more -language-specific Hunspell dictionaries. - -This filter uses Lucene's -{lucene-analysis-docs}/hunspell/HunspellStemFilter.html[HunspellStemFilter]. - -[TIP] -==== -If available, we recommend trying an algorithmic stemmer for your language -before using the <> token filter. -In practice, algorithmic stemmers typically outperform dictionary stemmers. -See <>. -==== - -[[analysis-hunspell-tokenfilter-dictionary-config]] -==== Configure Hunspell dictionaries - -Hunspell dictionaries are stored and detected on a dedicated -`hunspell` directory on the filesystem: `<$ES_PATH_CONF>/hunspell`. Each dictionary -is expected to have its own directory, named after its associated language and -locale (e.g., `pt_BR`, `en_GB`). This dictionary directory is expected to hold a -single `.aff` and one or more `.dic` files, all of which will automatically be -picked up. For example, the following directory layout will define the `en_US` dictionary: - -[source,txt] --------------------------------------------------- -- config - |-- hunspell - | |-- en_US - | | |-- en_US.dic - | | |-- en_US.aff --------------------------------------------------- - -Each dictionary can be configured with one setting: - -[[analysis-hunspell-ignore-case-settings]] -`ignore_case`:: -(Static, Boolean) -If true, dictionary matching will be case insensitive. Defaults to `false`. -+ -This setting can be configured globally in `elasticsearch.yml` using -`indices.analysis.hunspell.dictionary.ignore_case`. -+ -To configure the setting for a specific locale, use the -`indices.analysis.hunspell.dictionary..ignore_case` setting (e.g., for -the `en_US` (American English) locale, the setting is -`indices.analysis.hunspell.dictionary.en_US.ignore_case`). -+ -You can also add a `settings.yml` file under the dictionary -directory which holds these settings. This overrides any other `ignore_case` -settings defined in `elasticsearch.yml`. - -[[analysis-hunspell-tokenfilter-analyze-ex]] -==== Example - -The following analyze API request uses the `hunspell` filter to stem -`the foxes jumping quickly` to `the fox jump quick`. - -The request specifies the `en_US` locale, meaning that the -`.aff` and `.dic` files in the `<$ES_PATH_CONF>/hunspell/en_US` directory are used -for the Hunspell dictionary. - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "standard", - "filter": [ - { - "type": "hunspell", - "locale": "en_US" - } - ], - "text": "the foxes jumping quickly" -} ----- - -The filter produces the following tokens: - -[source,text] ----- -[ the, fox, jump, quick ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "the", - "start_offset": 0, - "end_offset": 3, - "type": "", - "position": 0 - }, - { - "token": "fox", - "start_offset": 4, - "end_offset": 9, - "type": "", - "position": 1 - }, - { - "token": "jump", - "start_offset": 10, - "end_offset": 17, - "type": "", - "position": 2 - }, - { - "token": "quick", - "start_offset": 18, - "end_offset": 25, - "type": "", - "position": 3 - } - ] -} ----- -//// - -[[analysis-hunspell-tokenfilter-configure-parms]] -==== Configurable parameters - -[[analysis-hunspell-tokenfilter-dictionary-param]] -`dictionary`:: -(Optional, string or array of strings) -One or more `.dic` files (e.g, `en_US.dic, my_custom.dic`) to use for the -Hunspell dictionary. -+ -By default, the `hunspell` filter uses all `.dic` files in the -`<$ES_PATH_CONF>/hunspell/` directory specified using the -`lang`, `language`, or `locale` parameter. - -`dedup`:: -(Optional, Boolean) -If `true`, duplicate tokens are removed from the filter's output. Defaults to -`true`. - -`lang`:: -(Required*, string) -An alias for the <>. -+ -If this parameter is not specified, the `language` or `locale` parameter is -required. - -`language`:: -(Required*, string) -An alias for the <>. -+ -If this parameter is not specified, the `lang` or `locale` parameter is -required. - -[[analysis-hunspell-tokenfilter-locale-param]] -`locale`:: -(Required*, string) -Locale directory used to specify the `.aff` and `.dic` files for a Hunspell -dictionary. See <>. -+ -If this parameter is not specified, the `lang` or `language` parameter is -required. - -`longest_only`:: -(Optional, Boolean) -If `true`, only the longest stemmed version of each token is -included in the output. If `false`, all stemmed versions of the token are -included. Defaults to `false`. - -[[analysis-hunspell-tokenfilter-analyzer-ex]] -==== Customize and add to an analyzer - -To customize the `hunspell` filter, duplicate it to create the -basis for a new custom token filter. You can modify the filter using its -configurable parameters. - -For example, the following <> request -uses a custom `hunspell` filter, `my_en_US_dict_stemmer`, to configure a new -<>. - -The `my_en_US_dict_stemmer` filter uses a `locale` of `en_US`, meaning that the -`.aff` and `.dic` files in the `<$ES_PATH_CONF>/hunspell/en_US` directory are -used. The filter also includes a `dedup` argument of `false`, meaning that -duplicate tokens added from the dictionary are not removed from the filter's -output. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "en": { - "tokenizer": "standard", - "filter": [ "my_en_US_dict_stemmer" ] - } - }, - "filter": { - "my_en_US_dict_stemmer": { - "type": "hunspell", - "locale": "en_US", - "dedup": false - } - } - } - } -} ----- - -[[analysis-hunspell-tokenfilter-settings]] -==== Settings - -In addition to the <>, you can configure the following global settings for the `hunspell` -filter using `elasticsearch.yml`: - -`indices.analysis.hunspell.dictionary.lazy`:: -(Static, Boolean) -If `true`, the loading of Hunspell dictionaries is deferred until a dictionary -is used. If `false`, the dictionary directory is checked for dictionaries when -the node starts, and any dictionaries are automatically loaded. Defaults to -`false`. diff --git a/docs/reference/analysis/tokenfilters/hyphenation-decompounder-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/hyphenation-decompounder-tokenfilter.asciidoc deleted file mode 100644 index 1bd36f801aa17..0000000000000 --- a/docs/reference/analysis/tokenfilters/hyphenation-decompounder-tokenfilter.asciidoc +++ /dev/null @@ -1,166 +0,0 @@ -[[analysis-hyp-decomp-tokenfilter]] -=== Hyphenation decompounder token filter -++++ -Hyphenation decompounder -++++ - -Uses XML-based hyphenation patterns to find potential subwords in compound -words. These subwords are then checked against the specified word list. Subwords not -in the list are excluded from the token output. - -This filter uses Lucene's -{lucene-analysis-docs}/compound/HyphenationCompoundWordTokenFilter.html[HyphenationCompoundWordTokenFilter], -which was built for Germanic languages. - -[[analysis-hyp-decomp-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the -`hyphenation_decompounder` filter to find subwords in `Kaffeetasse` based on -German hyphenation patterns in the `analysis/hyphenation_patterns.xml` file. The -filter then checks these subwords against a list of specified words: `kaffee`, -`zucker`, and `tasse`. - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer": "standard", - "filter": [ - { - "type": "hyphenation_decompounder", - "hyphenation_patterns_path": "analysis/hyphenation_patterns.xml", - "word_list": ["Kaffee", "zucker", "tasse"] - } - ], - "text": "Kaffeetasse" -} --------------------------------------------------- -// TEST[skip: requires a valid hyphenation_patterns.xml file for DE-DR] - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ Kaffeetasse, Kaffee, tasse ] --------------------------------------------------- - -[[analysis-hyp-decomp-tokenfilter-configure-parms]] -==== Configurable parameters - -`hyphenation_patterns_path`:: -+ --- -(Required, string) -Path to an Apache FOP (Formatting Objects Processor) XML hyphenation pattern file. - -This path must be absolute or relative to the `config` location. Only FOP v1.2 -compatible files are supported. - -For example FOP XML hyphenation pattern files, refer to: - -* http://offo.sourceforge.net/#FOP+XML+Hyphenation+Patterns[Objects For Formatting Objects (OFFO) Sourceforge project] -* https://sourceforge.net/projects/offo/files/offo-hyphenation/1.2/offo-hyphenation_v1.2.zip/download[offo-hyphenation_v1.2.zip direct download] (v2.0 and above hyphenation pattern files are not supported) --- - -`word_list`:: -+ --- -(Required+++*+++, array of strings) -A list of subwords. Subwords found using the hyphenation pattern but not in this -list are excluded from the token output. - -You can use the <> -filter to test the quality of word lists before implementing them. - -Either this parameter or `word_list_path` must be specified. --- - -`word_list_path`:: -+ --- -(Required+++*+++, string) -Path to a file containing a list of subwords. Subwords found using the -hyphenation pattern but not in this list are excluded from the token output. - -This path must be absolute or relative to the `config` location, and the file -must be UTF-8 encoded. Each token in the file must be separated by a line break. - -You can use the <> -filter to test the quality of word lists before implementing them. - -Either this parameter or `word_list` must be specified. --- - -`max_subword_size`:: -(Optional, integer) -Maximum subword character length. Longer subword tokens are excluded from the -output. Defaults to `15`. - -`min_subword_size`:: -(Optional, integer) -Minimum subword character length. Shorter subword tokens are excluded from the -output. Defaults to `2`. - -`min_word_size`:: -(Optional, integer) -Minimum word character length. Shorter word tokens are excluded from the -output. Defaults to `5`. - -`only_longest_match`:: -(Optional, Boolean) -If `true`, only include the longest matching subword. Defaults to `false`. - -`no_sub_matches`:: -(Optional, Boolean) -If `true`, do not match sub tokens in tokens that are in the word list. -Defaults to `false`. - -`no_overlapping_matches`:: -(Optional, Boolean) -If `true`, do not allow overlapping tokens. -Defaults to `false`. - -Typically users will only want to include one of the three flags as enabling `no_overlapping_matches` is the most restrictive and `no_sub_matches` is more restrictive than `only_longest_match`. When enabling a more restrictive option the state of the less restrictive does not have any effect. - -[[analysis-hyp-decomp-tokenfilter-customize]] -==== Customize and add to an analyzer - -To customize the `hyphenation_decompounder` filter, duplicate it to create the -basis for a new custom token filter. You can modify the filter using its -configurable parameters. - -For example, the following <> request -uses a custom `hyphenation_decompounder` filter to configure a new -<>. - -The custom `hyphenation_decompounder` filter find subwords based on hyphenation -patterns in the `analysis/hyphenation_patterns.xml` file. The filter then checks -these subwords against the list of words specified in the -`analysis/example_word_list.txt` file. Subwords longer than 22 characters are -excluded from the token output. - -[source,console] --------------------------------------------------- -PUT hyphenation_decompound_example -{ - "settings": { - "analysis": { - "analyzer": { - "standard_hyphenation_decompound": { - "tokenizer": "standard", - "filter": [ "22_char_hyphenation_decompound" ] - } - }, - "filter": { - "22_char_hyphenation_decompound": { - "type": "hyphenation_decompounder", - "word_list_path": "analysis/example_word_list.txt", - "hyphenation_patterns_path": "analysis/hyphenation_patterns.xml", - "max_subword_size": 22 - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/keep-types-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/keep-types-tokenfilter.asciidoc deleted file mode 100644 index 1ad7a763edca5..0000000000000 --- a/docs/reference/analysis/tokenfilters/keep-types-tokenfilter.asciidoc +++ /dev/null @@ -1,205 +0,0 @@ -[[analysis-keep-types-tokenfilter]] -=== Keep types token filter -++++ -Keep types -++++ - -Keeps or removes tokens of a specific type. For example, you can use this filter -to change `3 quick foxes` to `quick foxes` by keeping only `` -(alphanumeric) tokens. - -[NOTE] -.Token types -==== -Token types are set by the <> when converting -characters to tokens. Token types can vary between tokenizers. - -For example, the <> tokenizer can -produce a variety of token types, including ``, ``, and -``. Simpler analyzers, like the -<> tokenizer, only produce the `word` -token type. - -Certain token filters can also add token types. For example, the -<> filter can add the `` token -type. - -Some tokenizers don't support this token filter, for example keyword, simple_pattern, and -simple_pattern_split tokenizers, as they don't support setting the token type attribute. -==== - -This filter uses Lucene's -{lucene-analysis-docs}/core/TypeTokenFilter.html[TypeTokenFilter]. - -[[analysis-keep-types-tokenfilter-analyze-include-ex]] -==== Include example - -The following <> request uses the `keep_types` -filter to keep only `` (numeric) tokens from `1 quick fox 2 lazy dogs`. - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer": "standard", - "filter": [ - { - "type": "keep_types", - "types": [ "" ] - } - ], - "text": "1 quick fox 2 lazy dogs" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ 1, 2 ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens": [ - { - "token": "1", - "start_offset": 0, - "end_offset": 1, - "type": "", - "position": 0 - }, - { - "token": "2", - "start_offset": 12, - "end_offset": 13, - "type": "", - "position": 3 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-keep-types-tokenfilter-analyze-exclude-ex]] -==== Exclude example - -The following <> request uses the `keep_types` -filter to remove `` tokens from `1 quick fox 2 lazy dogs`. Note the `mode` -parameter is set to `exclude`. - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer": "standard", - "filter": [ - { - "type": "keep_types", - "types": [ "" ], - "mode": "exclude" - } - ], - "text": "1 quick fox 2 lazy dogs" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ quick, fox, lazy, dogs ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens": [ - { - "token": "quick", - "start_offset": 2, - "end_offset": 7, - "type": "", - "position": 1 - }, - { - "token": "fox", - "start_offset": 8, - "end_offset": 11, - "type": "", - "position": 2 - }, - { - "token": "lazy", - "start_offset": 14, - "end_offset": 18, - "type": "", - "position": 4 - }, - { - "token": "dogs", - "start_offset": 19, - "end_offset": 23, - "type": "", - "position": 5 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-keep-types-tokenfilter-configure-parms]] -==== Configurable parameters - -`types`:: -(Required, array of strings) -List of token types to keep or remove. - -`mode`:: -(Optional, string) -Indicates whether to keep or remove the specified token types. -Valid values are: - -`include`::: -(Default) Keep only the specified token types. - -`exclude`::: -Remove the specified token types. - -[[analysis-keep-types-tokenfilter-customize]] -==== Customize and add to an analyzer - -To customize the `keep_types` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following <> request -uses a custom `keep_types` filter to configure a new -<>. The custom `keep_types` filter -keeps only `` (alphanumeric) tokens. - -[source,console] --------------------------------------------------- -PUT keep_types_example -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "standard", - "filter": [ "extract_alpha" ] - } - }, - "filter": { - "extract_alpha": { - "type": "keep_types", - "types": [ "" ] - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc deleted file mode 100644 index a0a9c08bf998e..0000000000000 --- a/docs/reference/analysis/tokenfilters/keep-words-tokenfilter.asciidoc +++ /dev/null @@ -1,146 +0,0 @@ -[[analysis-keep-words-tokenfilter]] -=== Keep words token filter -++++ -Keep words -++++ - -Keeps only tokens contained in a specified word list. - -This filter uses Lucene's -{lucene-analysis-docs}/miscellaneous/KeepWordFilter.html[KeepWordFilter]. - -[NOTE] -==== -To remove a list of words from a token stream, use the -<> filter. -==== - -[[analysis-keep-words-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `keep` filter to -keep only the `fox` and `dog` tokens from -`the quick fox jumps over the lazy dog`. - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer": "whitespace", - "filter": [ - { - "type": "keep", - "keep_words": [ "dog", "elephant", "fox" ] - } - ], - "text": "the quick fox jumps over the lazy dog" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ fox, dog ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens": [ - { - "token": "fox", - "start_offset": 10, - "end_offset": 13, - "type": "word", - "position": 2 - }, - { - "token": "dog", - "start_offset": 34, - "end_offset": 37, - "type": "word", - "position": 7 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-keep-words-tokenfilter-configure-parms]] -==== Configurable parameters - -`keep_words`:: -+ --- -(Required+++*+++, array of strings) -List of words to keep. Only tokens that match words in this list are included in -the output. - -Either this parameter or `keep_words_path` must be specified. --- - -`keep_words_path`:: -+ --- -(Required+++*+++, array of strings) -Path to a file that contains a list of words to keep. Only tokens that match -words in this list are included in the output. - -This path must be absolute or relative to the `config` location, and the file -must be UTF-8 encoded. Each word in the file must be separated by a line break. - -Either this parameter or `keep_words` must be specified. --- - -`keep_words_case`:: -(Optional, Boolean) -If `true`, lowercase all keep words. Defaults to `false`. - -[[analysis-keep-words-tokenfilter-customize]] -==== Customize and add to an analyzer - -To customize the `keep` filter, duplicate it to create the basis for a new -custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following <> request -uses custom `keep` filters to configure two new -<>: - -* `standard_keep_word_array`, which uses a custom `keep` filter with an inline - array of keep words -* `standard_keep_word_file`, which uses a customer `keep` filter with a keep - words file - -[source,console] --------------------------------------------------- -PUT keep_words_example -{ - "settings": { - "analysis": { - "analyzer": { - "standard_keep_word_array": { - "tokenizer": "standard", - "filter": [ "keep_word_array" ] - }, - "standard_keep_word_file": { - "tokenizer": "standard", - "filter": [ "keep_word_file" ] - } - }, - "filter": { - "keep_word_array": { - "type": "keep", - "keep_words": [ "one", "two", "three" ] - }, - "keep_word_file": { - "type": "keep", - "keep_words_path": "analysis/example_word_list.txt" - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/keyword-marker-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/keyword-marker-tokenfilter.asciidoc deleted file mode 100644 index aab546326a90a..0000000000000 --- a/docs/reference/analysis/tokenfilters/keyword-marker-tokenfilter.asciidoc +++ /dev/null @@ -1,389 +0,0 @@ -[[analysis-keyword-marker-tokenfilter]] -=== Keyword marker token filter -++++ -Keyword marker -++++ - -Marks specified tokens as keywords, which are not stemmed. - -The `keyword_marker` filter assigns specified tokens a `keyword` attribute of -`true`. Stemmer token filters, such as -<> or -<>, skip tokens with a `keyword` -attribute of `true`. - -[IMPORTANT] -==== -To work properly, the `keyword_marker` filter must be listed before any stemmer -token filters in the <>. -==== - -The `keyword_marker` filter uses Lucene's -{lucene-analysis-docs}/miscellaneous/KeywordMarkerFilter.html[KeywordMarkerFilter]. - -[[analysis-keyword-marker-tokenfilter-analyze-ex]] -==== Example - -To see how the `keyword_marker` filter works, you first need to produce a token -stream containing stemmed tokens. - -The following <> request uses the -<> filter to create stemmed tokens for -`fox running and jumping`. - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "whitespace", - "filter": [ "stemmer" ], - "text": "fox running and jumping" -} ----- - -The request produces the following tokens. Note that `running` was stemmed to -`run` and `jumping` was stemmed to `jump`. - -[source,text] ----- -[ fox, run, and, jump ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "fox", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "run", - "start_offset": 4, - "end_offset": 11, - "type": "word", - "position": 1 - }, - { - "token": "and", - "start_offset": 12, - "end_offset": 15, - "type": "word", - "position": 2 - }, - { - "token": "jump", - "start_offset": 16, - "end_offset": 23, - "type": "word", - "position": 3 - } - ] -} ----- -//// - -To prevent `jumping` from being stemmed, add the `keyword_marker` filter before -the `stemmer` filter in the previous analyze API request. Specify `jumping` in -the `keywords` parameter of the `keyword_marker` filter. - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "whitespace", - "filter": [ - { - "type": "keyword_marker", - "keywords": [ "jumping" ] - }, - "stemmer" - ], - "text": "fox running and jumping" -} ----- - -The request produces the following tokens. `running` is still stemmed to `run`, -but `jumping` is not stemmed. - -[source,text] ----- -[ fox, run, and, jumping ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "fox", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "run", - "start_offset": 4, - "end_offset": 11, - "type": "word", - "position": 1 - }, - { - "token": "and", - "start_offset": 12, - "end_offset": 15, - "type": "word", - "position": 2 - }, - { - "token": "jumping", - "start_offset": 16, - "end_offset": 23, - "type": "word", - "position": 3 - } - ] -} ----- -//// - -To see the `keyword` attribute for these tokens, add the following arguments to -the analyze API request: - -* `explain`: `true` -* `attributes`: `keyword` - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "whitespace", - "filter": [ - { - "type": "keyword_marker", - "keywords": [ "jumping" ] - }, - "stemmer" - ], - "text": "fox running and jumping", - "explain": true, - "attributes": "keyword" -} ----- - -The API returns the following response. Note the `jumping` token has a -`keyword` attribute of `true`. - -[source,console-result] ----- -{ - "detail": { - "custom_analyzer": true, - "charfilters": [], - "tokenizer": { - "name": "whitespace", - "tokens": [ - { - "token": "fox", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "running", - "start_offset": 4, - "end_offset": 11, - "type": "word", - "position": 1 - }, - { - "token": "and", - "start_offset": 12, - "end_offset": 15, - "type": "word", - "position": 2 - }, - { - "token": "jumping", - "start_offset": 16, - "end_offset": 23, - "type": "word", - "position": 3 - } - ] - }, - "tokenfilters": [ - { - "name": "__anonymous__keyword_marker", - "tokens": [ - { - "token": "fox", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0, - "keyword": false - }, - { - "token": "running", - "start_offset": 4, - "end_offset": 11, - "type": "word", - "position": 1, - "keyword": false - }, - { - "token": "and", - "start_offset": 12, - "end_offset": 15, - "type": "word", - "position": 2, - "keyword": false - }, - { - "token": "jumping", - "start_offset": 16, - "end_offset": 23, - "type": "word", - "position": 3, - "keyword": true - } - ] - }, - { - "name": "stemmer", - "tokens": [ - { - "token": "fox", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0, - "keyword": false - }, - { - "token": "run", - "start_offset": 4, - "end_offset": 11, - "type": "word", - "position": 1, - "keyword": false - }, - { - "token": "and", - "start_offset": 12, - "end_offset": 15, - "type": "word", - "position": 2, - "keyword": false - }, - { - "token": "jumping", - "start_offset": 16, - "end_offset": 23, - "type": "word", - "position": 3, - "keyword": true - } - ] - } - ] - } -} ----- - -[[analysis-keyword-marker-tokenfilter-configure-parms]] -==== Configurable parameters - -`ignore_case`:: -(Optional, Boolean) -If `true`, matching for the `keywords` and `keywords_path` parameters ignores -letter case. Defaults to `false`. - -`keywords`:: -(Required*, array of strings) -Array of keywords. Tokens that match these keywords are not stemmed. -+ -This parameter, `keywords_path`, or `keywords_pattern` must be specified. -You cannot specify this parameter and `keywords_pattern`. - -`keywords_path`:: -+ --- -(Required*, string) -Path to a file that contains a list of keywords. Tokens that match these -keywords are not stemmed. - -This path must be absolute or relative to the `config` location, and the file -must be UTF-8 encoded. Each word in the file must be separated by a line break. - -This parameter, `keywords`, or `keywords_pattern` must be specified. -You cannot specify this parameter and `keywords_pattern`. --- - -`keywords_pattern`:: -+ --- -(Required*, string) -https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html[Java -regular expression] used to match tokens. Tokens that match this expression are -marked as keywords and not stemmed. - -This parameter, `keywords`, or `keywords_path` must be specified. You -cannot specify this parameter and `keywords` or `keywords_pattern`. - -[WARNING] -==== -Poorly written regular expressions can cause {es} to run slowly or result -in stack overflow errors, causing the running node to suddenly exit. -==== --- - -[[analysis-keyword-marker-tokenfilter-customize]] -==== Customize and add to an analyzer - -To customize the `keyword_marker` filter, duplicate it to create the basis for a -new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following <> request -uses a custom `keyword_marker` filter and the `porter_stem` -filter to configure a new <>. - -The custom `keyword_marker` filter marks tokens specified in the -`analysis/example_word_list.txt` file as keywords. The `porter_stem` filter does -not stem these tokens. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_custom_analyzer": { - "type": "custom", - "tokenizer": "standard", - "filter": [ - "my_custom_keyword_marker_filter", - "porter_stem" - ] - } - }, - "filter": { - "my_custom_keyword_marker_filter": { - "type": "keyword_marker", - "keywords_path": "analysis/example_word_list.txt" - } - } - } - } -} ----- diff --git a/docs/reference/analysis/tokenfilters/keyword-repeat-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/keyword-repeat-tokenfilter.asciidoc deleted file mode 100644 index a68eb4aab4f5b..0000000000000 --- a/docs/reference/analysis/tokenfilters/keyword-repeat-tokenfilter.asciidoc +++ /dev/null @@ -1,402 +0,0 @@ -[[analysis-keyword-repeat-tokenfilter]] -=== Keyword repeat token filter -++++ -Keyword repeat -++++ - -Outputs a keyword version of each token in a stream. These keyword tokens are -not stemmed. - -The `keyword_repeat` filter assigns keyword tokens a `keyword` attribute of -`true`. Stemmer token filters, such as -<> or -<>, skip tokens with a `keyword` -attribute of `true`. - -You can use the `keyword_repeat` filter with a stemmer token filter to output a -stemmed and unstemmed version of each token in a stream. - -[IMPORTANT] -==== -To work properly, the `keyword_repeat` filter must be listed before any stemmer -token filters in the <>. - -Stemming does not affect all tokens. This means streams could contain duplicate -tokens in the same position, even after stemming. - -To remove these duplicate tokens, add the -<> filter after the -stemmer filter in the analyzer configuration. -==== - -The `keyword_repeat` filter uses Lucene's -{lucene-analysis-docs}/miscellaneous/KeywordRepeatFilter.html[KeywordRepeatFilter]. - -[[analysis-keyword-repeat-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `keyword_repeat` -filter to output a keyword and non-keyword version of each token in -`fox running and jumping`. - -To return the `keyword` attribute for these tokens, the analyze API request also -includes the following arguments: - -* `explain`: `true` -* `attributes`: `keyword` - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "whitespace", - "filter": [ - "keyword_repeat" - ], - "text": "fox running and jumping", - "explain": true, - "attributes": "keyword" -} ----- - -The API returns the following response. Note that one version of each token has -a `keyword` attribute of `true`. - -.**Response** -[%collapsible] -==== -[source,console-result] ----- -{ - "detail": { - "custom_analyzer": true, - "charfilters": [], - "tokenizer": ..., - "tokenfilters": [ - { - "name": "keyword_repeat", - "tokens": [ - { - "token": "fox", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0, - "keyword": true - }, - { - "token": "fox", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0, - "keyword": false - }, - { - "token": "running", - "start_offset": 4, - "end_offset": 11, - "type": "word", - "position": 1, - "keyword": true - }, - { - "token": "running", - "start_offset": 4, - "end_offset": 11, - "type": "word", - "position": 1, - "keyword": false - }, - { - "token": "and", - "start_offset": 12, - "end_offset": 15, - "type": "word", - "position": 2, - "keyword": true - }, - { - "token": "and", - "start_offset": 12, - "end_offset": 15, - "type": "word", - "position": 2, - "keyword": false - }, - { - "token": "jumping", - "start_offset": 16, - "end_offset": 23, - "type": "word", - "position": 3, - "keyword": true - }, - { - "token": "jumping", - "start_offset": 16, - "end_offset": 23, - "type": "word", - "position": 3, - "keyword": false - } - ] - } - ] - } -} ----- -// TESTRESPONSE[s/"tokenizer": \.\.\./"tokenizer": $body.detail.tokenizer/] -==== - -To stem the non-keyword tokens, add the `stemmer` filter after the -`keyword_repeat` filter in the previous analyze API request. - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "whitespace", - "filter": [ - "keyword_repeat", - "stemmer" - ], - "text": "fox running and jumping", - "explain": true, - "attributes": "keyword" -} ----- - -The API returns the following response. Note the following changes: - -* The non-keyword version of `running` was stemmed to `run`. -* The non-keyword version of `jumping` was stemmed to `jump`. - -.**Response** -[%collapsible] -==== -[source,console-result] ----- -{ - "detail": { - "custom_analyzer": true, - "charfilters": [], - "tokenizer": ..., - "tokenfilters": [ - { - "name": "keyword_repeat", - "tokens": ... - }, - { - "name": "stemmer", - "tokens": [ - { - "token": "fox", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0, - "keyword": true - }, - { - "token": "fox", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0, - "keyword": false - }, - { - "token": "running", - "start_offset": 4, - "end_offset": 11, - "type": "word", - "position": 1, - "keyword": true - }, - { - "token": "run", - "start_offset": 4, - "end_offset": 11, - "type": "word", - "position": 1, - "keyword": false - }, - { - "token": "and", - "start_offset": 12, - "end_offset": 15, - "type": "word", - "position": 2, - "keyword": true - }, - { - "token": "and", - "start_offset": 12, - "end_offset": 15, - "type": "word", - "position": 2, - "keyword": false - }, - { - "token": "jumping", - "start_offset": 16, - "end_offset": 23, - "type": "word", - "position": 3, - "keyword": true - }, - { - "token": "jump", - "start_offset": 16, - "end_offset": 23, - "type": "word", - "position": 3, - "keyword": false - } - ] - } - ] - } -} ----- -// TESTRESPONSE[s/"tokenizer": \.\.\./"tokenizer": $body.detail.tokenizer/] -// TESTRESPONSE[s/"tokens": .../"tokens": $body.$_path/] -==== - -However, the keyword and non-keyword versions of `fox` and `and` are -identical and in the same respective positions. - -To remove these duplicate tokens, add the `remove_duplicates` filter after -`stemmer` in the analyze API request. - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "whitespace", - "filter": [ - "keyword_repeat", - "stemmer", - "remove_duplicates" - ], - "text": "fox running and jumping", - "explain": true, - "attributes": "keyword" -} ----- - -The API returns the following response. Note that the duplicate tokens for `fox` -and `and` have been removed. - -.**Response** -[%collapsible] -==== -[source,console-result] ----- -{ - "detail": { - "custom_analyzer": true, - "charfilters": [], - "tokenizer": ..., - "tokenfilters": [ - { - "name": "keyword_repeat", - "tokens": ... - }, - { - "name": "stemmer", - "tokens": ... - }, - { - "name": "remove_duplicates", - "tokens": [ - { - "token": "fox", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0, - "keyword": true - }, - { - "token": "running", - "start_offset": 4, - "end_offset": 11, - "type": "word", - "position": 1, - "keyword": true - }, - { - "token": "run", - "start_offset": 4, - "end_offset": 11, - "type": "word", - "position": 1, - "keyword": false - }, - { - "token": "and", - "start_offset": 12, - "end_offset": 15, - "type": "word", - "position": 2, - "keyword": true - }, - { - "token": "jumping", - "start_offset": 16, - "end_offset": 23, - "type": "word", - "position": 3, - "keyword": true - }, - { - "token": "jump", - "start_offset": 16, - "end_offset": 23, - "type": "word", - "position": 3, - "keyword": false - } - ] - } - ] - } -} ----- -// TESTRESPONSE[s/"tokenizer": \.\.\./"tokenizer": $body.detail.tokenizer/] -// TESTRESPONSE[s/"tokens": .../"tokens": $body.$_path/] -==== - -[[analysis-keyword-repeat-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`keyword_repeat` filter to configure a new <>. - -This custom analyzer uses the `keyword_repeat` and `porter_stem` filters to -create a stemmed and unstemmed version of each token in a stream. The -`remove_duplicates` filter then removes any duplicate tokens from the stream. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_custom_analyzer": { - "tokenizer": "standard", - "filter": [ - "keyword_repeat", - "porter_stem", - "remove_duplicates" - ] - } - } - } - } -} ----- \ No newline at end of file diff --git a/docs/reference/analysis/tokenfilters/kstem-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/kstem-tokenfilter.asciidoc deleted file mode 100644 index 2741a568ab3ec..0000000000000 --- a/docs/reference/analysis/tokenfilters/kstem-tokenfilter.asciidoc +++ /dev/null @@ -1,115 +0,0 @@ -[[analysis-kstem-tokenfilter]] -=== KStem token filter -++++ -KStem -++++ - -Provides https://ciir.cs.umass.edu/pubfiles/ir-35.pdf[KStem]-based stemming for -the English language. The `kstem` filter combines -<> with a built-in -<>. - -The `kstem` filter tends to stem less aggressively than other English stemmer -filters, such as the <> filter. - -The `kstem` filter is equivalent to the -<> filter's -<> variant. - -This filter uses Lucene's -{lucene-analysis-docs}/en/KStemFilter.html[KStemFilter]. - -[[analysis-kstem-tokenfilter-analyze-ex]] -==== Example - -The following analyze API request uses the `kstem` filter to stem `the foxes -jumping quickly` to `the fox jump quick`: - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "standard", - "filter": [ "kstem" ], - "text": "the foxes jumping quickly" -} ----- - -The filter produces the following tokens: - -[source,text] ----- -[ the, fox, jump, quick ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "the", - "start_offset": 0, - "end_offset": 3, - "type": "", - "position": 0 - }, - { - "token": "fox", - "start_offset": 4, - "end_offset": 9, - "type": "", - "position": 1 - }, - { - "token": "jump", - "start_offset": 10, - "end_offset": 17, - "type": "", - "position": 2 - }, - { - "token": "quick", - "start_offset": 18, - "end_offset": 25, - "type": "", - "position": 3 - } - ] -} ----- -//// - -[[analysis-kstem-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`kstem` filter to configure a new <>. - -[IMPORTANT] -==== -To work properly, the `kstem` filter requires lowercase tokens. To ensure tokens -are lowercased, add the <> filter -before the `kstem` filter in the analyzer configuration. -==== - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "whitespace", - "filter": [ - "lowercase", - "kstem" - ] - } - } - } - } -} ----- diff --git a/docs/reference/analysis/tokenfilters/length-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/length-tokenfilter.asciidoc deleted file mode 100644 index 4eced2d39b1dc..0000000000000 --- a/docs/reference/analysis/tokenfilters/length-tokenfilter.asciidoc +++ /dev/null @@ -1,170 +0,0 @@ -[[analysis-length-tokenfilter]] -=== Length token filter -++++ -Length -++++ - -Removes tokens shorter or longer than specified character lengths. -For example, you can use the `length` filter to exclude tokens shorter than 2 -characters and tokens longer than 5 characters. - -This filter uses Lucene's -{lucene-analysis-docs}/miscellaneous/LengthFilter.html[LengthFilter]. - -[TIP] -==== -The `length` filter removes entire tokens. If you'd prefer to shorten tokens to -a specific length, use the <> filter. -==== - -[[analysis-length-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `length` -filter to remove tokens longer than 4 characters: - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer": "whitespace", - "filter": [ - { - "type": "length", - "min": 0, - "max": 4 - } - ], - "text": "the quick brown fox jumps over the lazy dog" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ the, fox, over, the, lazy, dog ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens": [ - { - "token": "the", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "fox", - "start_offset": 16, - "end_offset": 19, - "type": "word", - "position": 3 - }, - { - "token": "over", - "start_offset": 26, - "end_offset": 30, - "type": "word", - "position": 5 - }, - { - "token": "the", - "start_offset": 31, - "end_offset": 34, - "type": "word", - "position": 6 - }, - { - "token": "lazy", - "start_offset": 35, - "end_offset": 39, - "type": "word", - "position": 7 - }, - { - "token": "dog", - "start_offset": 40, - "end_offset": 43, - "type": "word", - "position": 8 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-length-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`length` filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT length_example -{ - "settings": { - "analysis": { - "analyzer": { - "standard_length": { - "tokenizer": "standard", - "filter": [ "length" ] - } - } - } - } -} --------------------------------------------------- - -[[analysis-length-tokenfilter-configure-parms]] -==== Configurable parameters - -`min`:: -(Optional, integer) -Minimum character length of a token. Shorter tokens are excluded from the -output. Defaults to `0`. - -`max`:: -(Optional, integer) -Maximum character length of a token. Longer tokens are excluded from the output. -Defaults to `Integer.MAX_VALUE`, which is `2^31-1` or `2147483647`. - -[[analysis-length-tokenfilter-customize]] -==== Customize - -To customize the `length` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom `length` filter that removes -tokens shorter than 2 characters and tokens longer than 10 characters: - -[source,console] --------------------------------------------------- -PUT length_custom_example -{ - "settings": { - "analysis": { - "analyzer": { - "whitespace_length_2_to_10_char": { - "tokenizer": "whitespace", - "filter": [ "length_2_to_10_char" ] - } - }, - "filter": { - "length_2_to_10_char": { - "type": "length", - "min": 2, - "max": 10 - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/limit-token-count-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/limit-token-count-tokenfilter.asciidoc deleted file mode 100644 index e06cba7871b93..0000000000000 --- a/docs/reference/analysis/tokenfilters/limit-token-count-tokenfilter.asciidoc +++ /dev/null @@ -1,143 +0,0 @@ -[[analysis-limit-token-count-tokenfilter]] -=== Limit token count token filter -++++ -Limit token count -++++ - -Limits the number of output tokens. The `limit` filter is commonly used to limit -the size of document field values based on token count. - -By default, the `limit` filter keeps only the first token in a stream. For -example, the filter can change the token stream `[ one, two, three ]` to -`[ one ]`. - -This filter uses Lucene's -{lucene-analysis-docs}/miscellaneous/LimitTokenCountFilter.html[LimitTokenCountFilter]. - -[TIP] -==== - If you want to limit the size of field values based on -_character length_, use the <> mapping parameter. -==== - -[[analysis-limit-token-count-tokenfilter-configure-parms]] -==== Configurable parameters - -`max_token_count`:: -(Optional, integer) -Maximum number of tokens to keep. Once this limit is reached, any remaining -tokens are excluded from the output. Defaults to `1`. - -`consume_all_tokens`:: -(Optional, Boolean) -If `true`, the `limit` filter exhausts the token stream, even if the -`max_token_count` has already been reached. Defaults to `false`. - -[[analysis-limit-token-count-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `limit` -filter to keep only the first two tokens in `quick fox jumps over lazy dog`: - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer": "standard", - "filter": [ - { - "type": "limit", - "max_token_count": 2 - } - ], - "text": "quick fox jumps over lazy dog" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ quick, fox ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens": [ - { - "token": "quick", - "start_offset": 0, - "end_offset": 5, - "type": "", - "position": 0 - }, - { - "token": "fox", - "start_offset": 6, - "end_offset": 9, - "type": "", - "position": 1 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-limit-token-count-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`limit` filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT limit_example -{ - "settings": { - "analysis": { - "analyzer": { - "standard_one_token_limit": { - "tokenizer": "standard", - "filter": [ "limit" ] - } - } - } - } -} --------------------------------------------------- - -[[analysis-limit-token-count-tokenfilter-customize]] -==== Customize - -To customize the `limit` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom `limit` filter that keeps -only the first five tokens of a stream: - -[source,console] --------------------------------------------------- -PUT custom_limit_example -{ - "settings": { - "analysis": { - "analyzer": { - "whitespace_five_token_limit": { - "tokenizer": "whitespace", - "filter": [ "five_token_limit" ] - } - }, - "filter": { - "five_token_limit": { - "type": "limit", - "max_token_count": 5 - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc deleted file mode 100644 index 7d6db987ab95a..0000000000000 --- a/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc +++ /dev/null @@ -1,152 +0,0 @@ -[[analysis-lowercase-tokenfilter]] -=== Lowercase token filter -++++ -Lowercase -++++ - -Changes token text to lowercase. For example, you can use the `lowercase` filter -to change `THE Lazy DoG` to `the lazy dog`. - -In addition to a default filter, the `lowercase` token filter provides access to -Lucene's language-specific lowercase filters for Greek, Irish, and Turkish. - -[[analysis-lowercase-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the default -`lowercase` filter to change the `THE Quick FoX JUMPs` to lowercase: - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer" : "standard", - "filter" : ["lowercase"], - "text" : "THE Quick FoX JUMPs" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ the, quick, fox, jumps ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "the", - "start_offset" : 0, - "end_offset" : 3, - "type" : "", - "position" : 0 - }, - { - "token" : "quick", - "start_offset" : 4, - "end_offset" : 9, - "type" : "", - "position" : 1 - }, - { - "token" : "fox", - "start_offset" : 10, - "end_offset" : 13, - "type" : "", - "position" : 2 - }, - { - "token" : "jumps", - "start_offset" : 14, - "end_offset" : 19, - "type" : "", - "position" : 3 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-lowercase-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`lowercase` filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT lowercase_example -{ - "settings": { - "analysis": { - "analyzer": { - "whitespace_lowercase": { - "tokenizer": "whitespace", - "filter": [ "lowercase" ] - } - } - } - } -} --------------------------------------------------- - -[[analysis-lowercase-tokenfilter-configure-parms]] -==== Configurable parameters - -`language`:: -+ --- -(Optional, string) -Language-specific lowercase token filter to use. Valid values include: - -`greek`::: Uses Lucene's -{lucene-analysis-docs}/el/GreekLowerCaseFilter.html[GreekLowerCaseFilter] - -`irish`::: Uses Lucene's -{lucene-analysis-docs}/ga/IrishLowerCaseFilter.html[IrishLowerCaseFilter] - -`turkish`::: Uses Lucene's -{lucene-analysis-docs}/tr/TurkishLowerCaseFilter.html[TurkishLowerCaseFilter] - -If not specified, defaults to Lucene's -{lucene-analysis-docs}/core/LowerCaseFilter.html[LowerCaseFilter]. --- - -[[analysis-lowercase-tokenfilter-customize]] -==== Customize - -To customize the `lowercase` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom `lowercase` filter for the -Greek language: - -[source,console] --------------------------------------------------- -PUT custom_lowercase_example -{ - "settings": { - "analysis": { - "analyzer": { - "greek_lowercase_example": { - "type": "custom", - "tokenizer": "standard", - "filter": ["greek_lowercase"] - } - }, - "filter": { - "greek_lowercase": { - "type": "lowercase", - "language": "greek" - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/minhash-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/minhash-tokenfilter.asciidoc deleted file mode 100644 index bfbf59908ea47..0000000000000 --- a/docs/reference/analysis/tokenfilters/minhash-tokenfilter.asciidoc +++ /dev/null @@ -1,180 +0,0 @@ -[[analysis-minhash-tokenfilter]] -=== MinHash token filter -++++ -MinHash -++++ - -Uses the {wikipedia}/MinHash[MinHash] technique to produce a -signature for a token stream. You can use MinHash signatures to estimate the -similarity of documents. See <>. - -The `min_hash` filter performs the following operations on a token stream in -order: - -. Hashes each token in the stream. -. Assigns the hashes to buckets, keeping only the smallest hashes of each - bucket. -. Outputs the smallest hash from each bucket as a token stream. - -This filter uses Lucene's -{lucene-analysis-docs}/minhash/MinHashFilter.html[MinHashFilter]. - -[[analysis-minhash-tokenfilter-configure-parms]] -==== Configurable parameters - -`bucket_count`:: -(Optional, integer) -Number of buckets to which hashes are assigned. Defaults to `512`. - -`hash_count`:: -(Optional, integer) -Number of ways to hash each token in the stream. Defaults to `1`. - -`hash_set_size`:: -(Optional, integer) -Number of hashes to keep from each bucket. Defaults to `1`. -+ -Hashes are retained by ascending size, starting with the bucket's smallest hash -first. - -`with_rotation`:: -(Optional, Boolean) -If `true`, the filter fills empty buckets with the value of the first non-empty -bucket to its circular right if the `hash_set_size` is `1`. If the -`bucket_count` argument is greater than `1`, this parameter defaults to `true`. -Otherwise, this parameter defaults to `false`. - -[[analysis-minhash-tokenfilter-configuration-tips]] -==== Tips for configuring the `min_hash` filter - -* `min_hash` filter input tokens should typically be k-words shingles produced -from <>. You should -choose `k` large enough so that the probability of any given shingle -occurring in a document is low. At the same time, as -internally each shingle is hashed into to 128-bit hash, you should choose -`k` small enough so that all possible -different k-words shingles can be hashed to 128-bit hash with -minimal collision. - -* We recommend you test different arguments for the `hash_count`, `bucket_count` and - `hash_set_size` parameters: - -** To improve precision, increase the `bucket_count` or - `hash_set_size` arguments. Higher `bucket_count` and `hash_set_size` values - increase the likelihood that different tokens are indexed to different - buckets. - -** To improve the recall, increase the value of the `hash_count` argument. For - example, setting `hash_count` to `2` hashes each token in two different ways, - increasing the number of potential candidates for search. - -* By default, the `min_hash` filter produces 512 tokens for each document. Each -token is 16 bytes in size. This means each document's size will be increased by -around 8Kb. - -* The `min_hash` filter is used for Jaccard similarity. This means -that it doesn't matter how many times a document contains a certain token, -only that if it contains it or not. - -[[analysis-minhash-tokenfilter-similarity-search]] -==== Using the `min_hash` token filter for similarity search - -The `min_hash` token filter allows you to hash documents for similarity search. -Similarity search, or nearest neighbor search is a complex problem. -A naive solution requires an exhaustive pairwise comparison between a query -document and every document in an index. This is a prohibitive operation -if the index is large. A number of approximate nearest neighbor search -solutions have been developed to make similarity search more practical and -computationally feasible. One of these solutions involves hashing of documents. - -Documents are hashed in a way that similar documents are more likely -to produce the same hash code and are put into the same hash bucket, -while dissimilar documents are more likely to be hashed into -different hash buckets. This type of hashing is known as -locality sensitive hashing (LSH). - -Depending on what constitutes the similarity between documents, -various LSH functions https://arxiv.org/abs/1408.2927[have been proposed]. -For {wikipedia}/Jaccard_index[Jaccard similarity], a popular -LSH function is {wikipedia}/MinHash[MinHash]. -A general idea of the way MinHash produces a signature for a document -is by applying a random permutation over the whole index vocabulary (random -numbering for the vocabulary), and recording the minimum value for this permutation -for the document (the minimum number for a vocabulary word that is present -in the document). The permutations are run several times; -combining the minimum values for all of them will constitute a -signature for the document. - -In practice, instead of random permutations, a number of hash functions -are chosen. A hash function calculates a hash code for each of a -document's tokens and chooses the minimum hash code among them. -The minimum hash codes from all hash functions are combined -to form a signature for the document. - -[[analysis-minhash-tokenfilter-customize]] -==== Customize and add to an analyzer - -To customize the `min_hash` filter, duplicate it to create the basis for a new -custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following <> request -uses the following custom token filters to configure a new -<>: - -* `my_shingle_filter`, a custom <>. `my_shingle_filter` only outputs five-word shingles. -* `my_minhash_filter`, a custom `min_hash` filter. `my_minhash_filter` hashes - each five-word shingle once. It then assigns the hashes into 512 buckets, - keeping only the smallest hash from each bucket. - -The request also assigns the custom analyzer to the `fingerprint` field mapping. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "filter": { - "my_shingle_filter": { <1> - "type": "shingle", - "min_shingle_size": 5, - "max_shingle_size": 5, - "output_unigrams": false - }, - "my_minhash_filter": { - "type": "min_hash", - "hash_count": 1, <2> - "bucket_count": 512, <3> - "hash_set_size": 1, <4> - "with_rotation": true <5> - } - }, - "analyzer": { - "my_analyzer": { - "tokenizer": "standard", - "filter": [ - "my_shingle_filter", - "my_minhash_filter" - ] - } - } - } - }, - "mappings": { - "properties": { - "fingerprint": { - "type": "text", - "analyzer": "my_analyzer" - } - } - } -} ----- - -<1> Configures a custom shingle filter to output only five-word shingles. -<2> Each five-word shingle in the stream is hashed once. -<3> The hashes are assigned to 512 buckets. -<4> Only the smallest hash in each bucket is retained. -<5> The filter fills empty buckets with the values of neighboring buckets. diff --git a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc deleted file mode 100644 index 7c0014e17010f..0000000000000 --- a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc +++ /dev/null @@ -1,123 +0,0 @@ -[[analysis-multiplexer-tokenfilter]] -=== Multiplexer token filter -++++ -Multiplexer -++++ - -A token filter of type `multiplexer` will emit multiple tokens at the same position, -each version of the token having been run through a different filter. Identical -output tokens at the same position will be removed. - -WARNING: If the incoming token stream has duplicate tokens, then these will also be -removed by the multiplexer - -[discrete] -=== Options -[horizontal] -filters:: a list of token filters to apply to incoming tokens. These can be any - token filters defined elsewhere in the index mappings. Filters can be chained - using a comma-delimited string, so for example `"lowercase, porter_stem"` would - apply the `lowercase` filter and then the `porter_stem` filter to a single token. - -WARNING: <> or multi-word synonym token filters will not function normally - when they are declared in the filters array because they read ahead internally - which is unsupported by the multiplexer - -preserve_original:: if `true` (the default) then emit the original token in - addition to the filtered tokens - - -[discrete] -=== Settings example - -You can set it up like: - -[source,console] --------------------------------------------------- -PUT /multiplexer_example -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "standard", - "filter": [ "my_multiplexer" ] - } - }, - "filter": { - "my_multiplexer": { - "type": "multiplexer", - "filters": [ "lowercase", "lowercase, porter_stem" ] - } - } - } - } -} --------------------------------------------------- - -And test it like: - -[source,console] --------------------------------------------------- -POST /multiplexer_example/_analyze -{ - "analyzer" : "my_analyzer", - "text" : "Going HOME" -} --------------------------------------------------- -// TEST[continued] - -And it'd respond: - -[source,console-result] --------------------------------------------------- -{ - "tokens": [ - { - "token": "Going", - "start_offset": 0, - "end_offset": 5, - "type": "", - "position": 0 - }, - { - "token": "going", - "start_offset": 0, - "end_offset": 5, - "type": "", - "position": 0 - }, - { - "token": "go", - "start_offset": 0, - "end_offset": 5, - "type": "", - "position": 0 - }, - { - "token": "HOME", - "start_offset": 6, - "end_offset": 10, - "type": "", - "position": 1 - }, - { - "token": "home", <1> - "start_offset": 6, - "end_offset": 10, - "type": "", - "position": 1 - } - ] -} --------------------------------------------------- - -<1> The stemmer has also emitted a token `home` at position 1, but because it is a -duplicate of this token it has been removed from the token stream - -NOTE: The synonym and synonym_graph filters use their preceding analysis chain to -parse and analyse their synonym lists, and will throw an exception if that chain -contains token filters that produce multiple tokens at the same position. -If you want to apply synonyms to a token stream containing a multiplexer, then you -should append the synonym filter to each relevant multiplexer filter list, rather than -placing it after the multiplexer in the main token chain definition. diff --git a/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc deleted file mode 100644 index fc6aea3d06964..0000000000000 --- a/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc +++ /dev/null @@ -1,232 +0,0 @@ -[[analysis-ngram-tokenfilter]] -=== N-gram token filter -++++ -N-gram -++++ - -Forms {wikipedia}/N-gram[n-grams] of specified lengths from -a token. - -For example, you can use the `ngram` token filter to change `fox` to -`[ f, fo, o, ox, x ]`. - -This filter uses Lucene's -{lucene-analysis-docs}/ngram/NGramTokenFilter.html[NGramTokenFilter]. - -[NOTE] -==== -The `ngram` filter is similar to the -<>. However, the -`edge_ngram` only outputs n-grams that start at the beginning of a token. -==== - -[[analysis-ngram-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `ngram` -filter to convert `Quick fox` to 1-character and 2-character n-grams: - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer": "standard", - "filter": [ "ngram" ], - "text": "Quick fox" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ Q, Qu, u, ui, i, ic, c, ck, k, f, fo, o, ox, x ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "Q", - "start_offset" : 0, - "end_offset" : 5, - "type" : "", - "position" : 0 - }, - { - "token" : "Qu", - "start_offset" : 0, - "end_offset" : 5, - "type" : "", - "position" : 0 - }, - { - "token" : "u", - "start_offset" : 0, - "end_offset" : 5, - "type" : "", - "position" : 0 - }, - { - "token" : "ui", - "start_offset" : 0, - "end_offset" : 5, - "type" : "", - "position" : 0 - }, - { - "token" : "i", - "start_offset" : 0, - "end_offset" : 5, - "type" : "", - "position" : 0 - }, - { - "token" : "ic", - "start_offset" : 0, - "end_offset" : 5, - "type" : "", - "position" : 0 - }, - { - "token" : "c", - "start_offset" : 0, - "end_offset" : 5, - "type" : "", - "position" : 0 - }, - { - "token" : "ck", - "start_offset" : 0, - "end_offset" : 5, - "type" : "", - "position" : 0 - }, - { - "token" : "k", - "start_offset" : 0, - "end_offset" : 5, - "type" : "", - "position" : 0 - }, - { - "token" : "f", - "start_offset" : 6, - "end_offset" : 9, - "type" : "", - "position" : 1 - }, - { - "token" : "fo", - "start_offset" : 6, - "end_offset" : 9, - "type" : "", - "position" : 1 - }, - { - "token" : "o", - "start_offset" : 6, - "end_offset" : 9, - "type" : "", - "position" : 1 - }, - { - "token" : "ox", - "start_offset" : 6, - "end_offset" : 9, - "type" : "", - "position" : 1 - }, - { - "token" : "x", - "start_offset" : 6, - "end_offset" : 9, - "type" : "", - "position" : 1 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-ngram-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the `ngram` -filter to configure a new <>. - -[source,console] --------------------------------------------------- -PUT ngram_example -{ - "settings": { - "analysis": { - "analyzer": { - "standard_ngram": { - "tokenizer": "standard", - "filter": [ "ngram" ] - } - } - } - } -} --------------------------------------------------- - -[[analysis-ngram-tokenfilter-configure-parms]] -==== Configurable parameters - -`max_gram`:: -(Optional, integer) -Maximum length of characters in a gram. Defaults to `2`. - -`min_gram`:: -(Optional, integer) -Minimum length of characters in a gram. Defaults to `1`. - -`preserve_original`:: -(Optional, Boolean) -Emits original token when set to `true`. Defaults to `false`. - -You can use the <> index-level -setting to control the maximum allowed difference between the `max_gram` and -`min_gram` values. - -[[analysis-ngram-tokenfilter-customize]] -==== Customize - -To customize the `ngram` filter, duplicate it to create the basis for a new -custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom `ngram` filter that forms -n-grams between 3-5 characters. The request also increases the -`index.max_ngram_diff` setting to `2`. - -[source,console] --------------------------------------------------- -PUT ngram_custom_example -{ - "settings": { - "index": { - "max_ngram_diff": 2 - }, - "analysis": { - "analyzer": { - "default": { - "tokenizer": "whitespace", - "filter": [ "3_5_grams" ] - } - }, - "filter": { - "3_5_grams": { - "type": "ngram", - "min_gram": 3, - "max_gram": 5 - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/normalization-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/normalization-tokenfilter.asciidoc deleted file mode 100644 index b47420baf9d50..0000000000000 --- a/docs/reference/analysis/tokenfilters/normalization-tokenfilter.asciidoc +++ /dev/null @@ -1,43 +0,0 @@ -[[analysis-normalization-tokenfilter]] -=== Normalization token filters -++++ -Normalization -++++ - -There are several token filters available which try to normalize special -characters of a certain language. - -[horizontal] -Arabic:: - -{lucene-analysis-docs}/ar/ArabicNormalizer.html[`arabic_normalization`] - -German:: - -{lucene-analysis-docs}/de/GermanNormalizationFilter.html[`german_normalization`] - -Hindi:: - -{lucene-analysis-docs}/hi/HindiNormalizer.html[`hindi_normalization`] - -Indic:: - -{lucene-analysis-docs}/in/IndicNormalizer.html[`indic_normalization`] - -Kurdish (Sorani):: - -{lucene-analysis-docs}/ckb/SoraniNormalizer.html[`sorani_normalization`] - -Persian:: - -{lucene-analysis-docs}/fa/PersianNormalizer.html[`persian_normalization`] - -Scandinavian:: - -{lucene-analysis-docs}/miscellaneous/ScandinavianNormalizationFilter.html[`scandinavian_normalization`], -{lucene-analysis-docs}/miscellaneous/ScandinavianFoldingFilter.html[`scandinavian_folding`] - -Serbian:: - -{lucene-analysis-docs}/sr/SerbianNormalizationFilter.html[`serbian_normalization`] - diff --git a/docs/reference/analysis/tokenfilters/pattern-capture-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/pattern-capture-tokenfilter.asciidoc deleted file mode 100644 index b57c31a64e3b8..0000000000000 --- a/docs/reference/analysis/tokenfilters/pattern-capture-tokenfilter.asciidoc +++ /dev/null @@ -1,152 +0,0 @@ -[[analysis-pattern-capture-tokenfilter]] -=== Pattern capture token filter -++++ -Pattern capture -++++ - -The `pattern_capture` token filter, unlike the `pattern` tokenizer, -emits a token for every capture group in the regular expression. -Patterns are not anchored to the beginning and end of the string, so -each pattern can match multiple times, and matches are allowed to -overlap. - -[WARNING] -.Beware of Pathological Regular Expressions -======================================== - -The pattern capture token filter uses -https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html[Java Regular Expressions]. - -A badly written regular expression could run very slowly or even throw a -StackOverflowError and cause the node it is running on to exit suddenly. - -Read more about https://www.regular-expressions.info/catastrophic.html[pathological regular expressions and how to avoid them]. - -======================================== - -For instance a pattern like : - -[source,text] --------------------------------------------------- -"(([a-z]+)(\d*))" --------------------------------------------------- - -when matched against: - -[source,text] --------------------------------------------------- -"abc123def456" --------------------------------------------------- - -would produce the tokens: [ `abc123`, `abc`, `123`, `def456`, `def`, -`456` ] - -If `preserve_original` is set to `true` (the default) then it would also -emit the original token: `abc123def456`. - -This is particularly useful for indexing text like camel-case code, eg -`stripHTML` where a user may search for `"strip html"` or `"striphtml"`: - -[source,console] --------------------------------------------------- -PUT test -{ - "settings" : { - "analysis" : { - "filter" : { - "code" : { - "type" : "pattern_capture", - "preserve_original" : true, - "patterns" : [ - "(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)", - "(\\d+)" - ] - } - }, - "analyzer" : { - "code" : { - "tokenizer" : "pattern", - "filter" : [ "code", "lowercase" ] - } - } - } - } -} --------------------------------------------------- - -When used to analyze the text - -[source,java] --------------------------------------------------- -import static org.apache.commons.lang.StringEscapeUtils.escapeHtml --------------------------------------------------- - -this emits the tokens: [ `import`, `static`, `org`, `apache`, `commons`, -`lang`, `stringescapeutils`, `string`, `escape`, `utils`, `escapehtml`, -`escape`, `html` ] - -Another example is analyzing email addresses: - -[source,console] --------------------------------------------------- -PUT test -{ - "settings" : { - "analysis" : { - "filter" : { - "email" : { - "type" : "pattern_capture", - "preserve_original" : true, - "patterns" : [ - "([^@]+)", - "(\\p{L}+)", - "(\\d+)", - "@(.+)" - ] - } - }, - "analyzer" : { - "email" : { - "tokenizer" : "uax_url_email", - "filter" : [ "email", "lowercase", "unique" ] - } - } - } - } -} --------------------------------------------------- - -When the above analyzer is used on an email address like: - -[source,text] --------------------------------------------------- -john-smith_123@foo-bar.com --------------------------------------------------- - -it would produce the following tokens: - - john-smith_123@foo-bar.com, john-smith_123, - john, smith, 123, foo-bar.com, foo, bar, com - -Multiple patterns are required to allow overlapping captures, but also -means that patterns are less dense and easier to understand. - -*Note:* All tokens are emitted in the same position, and with the same -character offsets. This means, for example, that a `match` query for -`john-smith_123@foo-bar.com` that uses this analyzer will return documents -containing any of these tokens, even when using the `and` operator. -Also, when combined with highlighting, the whole original token will -be highlighted, not just the matching subset. For instance, querying -the above email address for `"smith"` would highlight: - -[source,html] --------------------------------------------------- - john-smith_123@foo-bar.com --------------------------------------------------- - -not: - -[source,html] --------------------------------------------------- - john-smith_123@foo-bar.com --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/pattern_replace-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/pattern_replace-tokenfilter.asciidoc deleted file mode 100644 index 37b96f23c8a39..0000000000000 --- a/docs/reference/analysis/tokenfilters/pattern_replace-tokenfilter.asciidoc +++ /dev/null @@ -1,157 +0,0 @@ -[[analysis-pattern_replace-tokenfilter]] -=== Pattern replace token filter -++++ -Pattern replace -++++ - -Uses a regular expression to match and replace token substrings. - -The `pattern_replace` filter uses -https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html[Java's -regular expression syntax]. By default, the filter replaces matching substrings -with an empty substring (`""`). Replacement substrings can use Java's -https://docs.oracle.com/javase/8/docs/api/java/util/regex/Matcher.html#appendReplacement-java.lang.StringBuffer-java.lang.String-[`$g` -syntax] to reference capture groups from the original token text. - -[WARNING] -==== -A poorly-written regular expression may run slowly or return a -StackOverflowError, causing the node running the expression to exit suddenly. - -Read more about -https://www.regular-expressions.info/catastrophic.html[pathological regular -expressions and how to avoid them]. -==== - -This filter uses Lucene's -{lucene-analysis-docs}/pattern/PatternReplaceFilter.html[PatternReplaceFilter]. - -[[analysis-pattern-replace-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `pattern_replace` -filter to prepend `watch` to the substring `dog` in `foxes jump lazy dogs`. - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "whitespace", - "filter": [ - { - "type": "pattern_replace", - "pattern": "(dog)", - "replacement": "watch$1" - } - ], - "text": "foxes jump lazy dogs" -} ----- - -The filter produces the following tokens. - -[source,text] ----- -[ foxes, jump, lazy, watchdogs ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "foxes", - "start_offset": 0, - "end_offset": 5, - "type": "word", - "position": 0 - }, - { - "token": "jump", - "start_offset": 6, - "end_offset": 10, - "type": "word", - "position": 1 - }, - { - "token": "lazy", - "start_offset": 11, - "end_offset": 15, - "type": "word", - "position": 2 - }, - { - "token": "watchdogs", - "start_offset": 16, - "end_offset": 20, - "type": "word", - "position": 3 - } - ] -} ----- -//// - -[[analysis-pattern-replace-tokenfilter-configure-parms]] -==== Configurable parameters - -`all`:: -(Optional, Boolean) -If `true`, all substrings matching the `pattern` parameter's regular expression -are replaced. If `false`, the filter replaces only the first matching substring -in each token. Defaults to `true`. - -`pattern`:: -(Required, string) -Regular expression, written in -https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html[Java's -regular expression syntax]. The filter replaces token substrings matching this -pattern with the substring in the `replacement` parameter. - -`replacement`:: -(Optional, string) -Replacement substring. Defaults to an empty substring (`""`). - -[[analysis-pattern-replace-tokenfilter-customize]] -==== Customize and add to an analyzer - -To customize the `pattern_replace` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -The following <> request -configures a new <> using a custom -`pattern_replace` filter, `my_pattern_replace_filter`. - -The `my_pattern_replace_filter` filter uses the regular expression `[£|€]` to -match and remove the currency symbols `£` and `€`. The filter's `all` -parameter is `false`, meaning only the first matching symbol in each token is -removed. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "keyword", - "filter": [ - "my_pattern_replace_filter" - ] - } - }, - "filter": { - "my_pattern_replace_filter": { - "type": "pattern_replace", - "pattern": "[£|€]", - "replacement": "", - "all": false - } - } - } - } -} ----- diff --git a/docs/reference/analysis/tokenfilters/phonetic-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/phonetic-tokenfilter.asciidoc deleted file mode 100644 index cceac39e691ca..0000000000000 --- a/docs/reference/analysis/tokenfilters/phonetic-tokenfilter.asciidoc +++ /dev/null @@ -1,7 +0,0 @@ -[[analysis-phonetic-tokenfilter]] -=== Phonetic token filter -++++ -Phonetic -++++ - -The `phonetic` token filter is provided as the {plugins}/analysis-phonetic.html[`analysis-phonetic`] plugin. diff --git a/docs/reference/analysis/tokenfilters/porterstem-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/porterstem-tokenfilter.asciidoc deleted file mode 100644 index 6c228ceb0457a..0000000000000 --- a/docs/reference/analysis/tokenfilters/porterstem-tokenfilter.asciidoc +++ /dev/null @@ -1,114 +0,0 @@ -[[analysis-porterstem-tokenfilter]] -=== Porter stem token filter -++++ -Porter stem -++++ - -Provides <> for the English language, -based on the https://snowballstem.org/algorithms/porter/stemmer.html[Porter -stemming algorithm]. - -This filter tends to stem more aggressively than other English -stemmer filters, such as the <> filter. - -The `porter_stem` filter is equivalent to the -<> filter's -<> variant. - -The `porter_stem` filter uses Lucene's -{lucene-analysis-docs}/en/PorterStemFilter.html[PorterStemFilter]. - -[[analysis-porterstem-tokenfilter-analyze-ex]] -==== Example - -The following analyze API request uses the `porter_stem` filter to stem -`the foxes jumping quickly` to `the fox jump quickli`: - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "standard", - "filter": [ "porter_stem" ], - "text": "the foxes jumping quickly" -} ----- - -The filter produces the following tokens: - -[source,text] ----- -[ the, fox, jump, quickli ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "the", - "start_offset": 0, - "end_offset": 3, - "type": "", - "position": 0 - }, - { - "token": "fox", - "start_offset": 4, - "end_offset": 9, - "type": "", - "position": 1 - }, - { - "token": "jump", - "start_offset": 10, - "end_offset": 17, - "type": "", - "position": 2 - }, - { - "token": "quickli", - "start_offset": 18, - "end_offset": 25, - "type": "", - "position": 3 - } - ] -} ----- -//// - -[[analysis-porterstem-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`porter_stem` filter to configure a new <>. - -[IMPORTANT] -==== -To work properly, the `porter_stem` filter requires lowercase tokens. To ensure -tokens are lowercased, add the <> -filter before the `porter_stem` filter in the analyzer configuration. -==== - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "whitespace", - "filter": [ - "lowercase", - "porter_stem" - ] - } - } - } - } -} ----- diff --git a/docs/reference/analysis/tokenfilters/predicate-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/predicate-tokenfilter.asciidoc deleted file mode 100644 index 9a20131a227e7..0000000000000 --- a/docs/reference/analysis/tokenfilters/predicate-tokenfilter.asciidoc +++ /dev/null @@ -1,128 +0,0 @@ -[[analysis-predicatefilter-tokenfilter]] -=== Predicate script token filter -++++ -Predicate script -++++ - -Removes tokens that don't match a provided predicate script. The filter supports -inline {painless}/index.html[Painless] scripts only. Scripts are evaluated in -the {painless}/painless-analysis-predicate-context.html[analysis predicate -context]. - -[[analysis-predicatefilter-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the -`predicate_token_filter` filter to only output tokens longer than three -characters from `the fox jumps the lazy dog`. - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "whitespace", - "filter": [ - { - "type": "predicate_token_filter", - "script": { - "source": """ - token.term.length() > 3 - """ - } - } - ], - "text": "the fox jumps the lazy dog" -} ----- - -The filter produces the following tokens. - -[source,text] ----- -[ jumps, lazy ] ----- - -The API response contains the position and offsets of each output token. Note -the `predicate_token_filter` filter does not change the tokens' original -positions or offsets. - -.*Response* -[%collapsible] -==== -[source,console-result] ----- -{ - "tokens" : [ - { - "token" : "jumps", - "start_offset" : 8, - "end_offset" : 13, - "type" : "word", - "position" : 2 - }, - { - "token" : "lazy", - "start_offset" : 18, - "end_offset" : 22, - "type" : "word", - "position" : 4 - } - ] -} ----- -==== - -[[analysis-predicatefilter-tokenfilter-configure-parms]] -==== Configurable parameters - -`script`:: -(Required, <>) -Script containing a condition used to filter incoming tokens. Only tokens that -match this script are included in the output. -+ -This parameter supports inline {painless}/index.html[Painless] scripts only. The -script is evaluated in the -{painless}/painless-analysis-predicate-context.html[analysis predicate context]. - -[[analysis-predicatefilter-tokenfilter-customize]] -==== Customize and add to an analyzer - -To customize the `predicate_token_filter` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -The following <> request -configures a new <> using a custom -`predicate_token_filter` filter, `my_script_filter`. - -The `my_script_filter` filter removes tokens with of any type other than -`ALPHANUM`. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "standard", - "filter": [ - "my_script_filter" - ] - } - }, - "filter": { - "my_script_filter": { - "type": "predicate_token_filter", - "script": { - "source": """ - token.type.contains("ALPHANUM") - """ - } - } - } - } - } -} ----- diff --git a/docs/reference/analysis/tokenfilters/remove-duplicates-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/remove-duplicates-tokenfilter.asciidoc deleted file mode 100644 index 52f2146f1daf6..0000000000000 --- a/docs/reference/analysis/tokenfilters/remove-duplicates-tokenfilter.asciidoc +++ /dev/null @@ -1,154 +0,0 @@ -[[analysis-remove-duplicates-tokenfilter]] -=== Remove duplicates token filter -++++ -Remove duplicates -++++ - -Removes duplicate tokens in the same position. - -The `remove_duplicates` filter uses Lucene's -{lucene-analysis-docs}/miscellaneous/RemoveDuplicatesTokenFilter.html[RemoveDuplicatesTokenFilter]. - -[[analysis-remove-duplicates-tokenfilter-analyze-ex]] -==== Example - -To see how the `remove_duplicates` filter works, you first need to produce a -token stream containing duplicate tokens in the same position. - -The following <> request uses the -<> and -<> filters to create stemmed and -unstemmed tokens for `jumping dog`. - -[source,console] ----- -GET _analyze -{ - "tokenizer": "whitespace", - "filter": [ - "keyword_repeat", - "stemmer" - ], - "text": "jumping dog" -} ----- - -The API returns the following response. Note that the `dog` token in position -`1` is duplicated. - -[source,console-result] ----- -{ - "tokens": [ - { - "token": "jumping", - "start_offset": 0, - "end_offset": 7, - "type": "word", - "position": 0 - }, - { - "token": "jump", - "start_offset": 0, - "end_offset": 7, - "type": "word", - "position": 0 - }, - { - "token": "dog", - "start_offset": 8, - "end_offset": 11, - "type": "word", - "position": 1 - }, - { - "token": "dog", - "start_offset": 8, - "end_offset": 11, - "type": "word", - "position": 1 - } - ] -} ----- - -To remove one of the duplicate `dog` tokens, add the `remove_duplicates` filter -to the previous analyze API request. - -[source,console] ----- -GET _analyze -{ - "tokenizer": "whitespace", - "filter": [ - "keyword_repeat", - "stemmer", - "remove_duplicates" - ], - "text": "jumping dog" -} ----- - -The API returns the following response. There is now only one `dog` token in -position `1`. - -[source,console-result] ----- -{ - "tokens": [ - { - "token": "jumping", - "start_offset": 0, - "end_offset": 7, - "type": "word", - "position": 0 - }, - { - "token": "jump", - "start_offset": 0, - "end_offset": 7, - "type": "word", - "position": 0 - }, - { - "token": "dog", - "start_offset": 8, - "end_offset": 11, - "type": "word", - "position": 1 - } - ] -} ----- - -[[analysis-remove-duplicates-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`remove_duplicates` filter to configure a new <>. - -This custom analyzer uses the `keyword_repeat` and `stemmer` filters to create a -stemmed and unstemmed version of each token in a stream. The `remove_duplicates` -filter then removes any duplicate tokens in the same position. - -[source,console] ----- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_custom_analyzer": { - "tokenizer": "standard", - "filter": [ - "keyword_repeat", - "stemmer", - "remove_duplicates" - ] - } - } - } - } -} ----- \ No newline at end of file diff --git a/docs/reference/analysis/tokenfilters/reverse-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/reverse-tokenfilter.asciidoc deleted file mode 100644 index d66e143b4c4de..0000000000000 --- a/docs/reference/analysis/tokenfilters/reverse-tokenfilter.asciidoc +++ /dev/null @@ -1,93 +0,0 @@ -[[analysis-reverse-tokenfilter]] -=== Reverse token filter -++++ -Reverse -++++ - -Reverses each token in a stream. For example, you can use the `reverse` filter -to change `cat` to `tac`. - -Reversed tokens are useful for suffix-based searches, -such as finding words that end in `-ion` or searching file names by their -extension. - -This filter uses Lucene's -{lucene-analysis-docs}/reverse/ReverseStringFilter.html[ReverseStringFilter]. - -[[analysis-reverse-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `reverse` -filter to reverse each token in `quick fox jumps`: - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer" : "standard", - "filter" : ["reverse"], - "text" : "quick fox jumps" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ kciuq, xof, spmuj ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "kciuq", - "start_offset" : 0, - "end_offset" : 5, - "type" : "", - "position" : 0 - }, - { - "token" : "xof", - "start_offset" : 6, - "end_offset" : 9, - "type" : "", - "position" : 1 - }, - { - "token" : "spmuj", - "start_offset" : 10, - "end_offset" : 15, - "type" : "", - "position" : 2 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-reverse-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`reverse` filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT reverse_example -{ - "settings" : { - "analysis" : { - "analyzer" : { - "whitespace_reverse" : { - "tokenizer" : "whitespace", - "filter" : ["reverse"] - } - } - } - } -} --------------------------------------------------- \ No newline at end of file diff --git a/docs/reference/analysis/tokenfilters/shingle-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/shingle-tokenfilter.asciidoc deleted file mode 100644 index 0598fb329717d..0000000000000 --- a/docs/reference/analysis/tokenfilters/shingle-tokenfilter.asciidoc +++ /dev/null @@ -1,510 +0,0 @@ -[[analysis-shingle-tokenfilter]] -=== Shingle token filter -++++ -Shingle -++++ - -Add shingles, or word {wikipedia}/N-gram[n-grams], to a token -stream by concatenating adjacent tokens. By default, the `shingle` token filter -outputs two-word shingles and unigrams. - -For example, many tokenizers convert `the lazy dog` to `[ the, lazy, dog ]`. You -can use the `shingle` filter to add two-word shingles to this stream: -`[ the, the lazy, lazy, lazy dog, dog ]`. - -TIP: Shingles are often used to help speed up phrase queries, such as -<>. Rather than creating shingles -using the `shingles` filter, we recommend you use the -<> mapping parameter on the appropriate -<> field instead. - -This filter uses Lucene's -{lucene-analysis-docs}/shingle/ShingleFilter.html[ShingleFilter]. - -[[analysis-shingle-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `shingle` -filter to add two-word shingles to the token stream for `quick brown fox jumps`: - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "whitespace", - "filter": [ "shingle" ], - "text": "quick brown fox jumps" -} ----- - -The filter produces the following tokens: - -[source,text] ----- -[ quick, quick brown, brown, brown fox, fox, fox jumps, jumps ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "quick", - "start_offset": 0, - "end_offset": 5, - "type": "word", - "position": 0 - }, - { - "token": "quick brown", - "start_offset": 0, - "end_offset": 11, - "type": "shingle", - "position": 0, - "positionLength": 2 - }, - { - "token": "brown", - "start_offset": 6, - "end_offset": 11, - "type": "word", - "position": 1 - }, - { - "token": "brown fox", - "start_offset": 6, - "end_offset": 15, - "type": "shingle", - "position": 1, - "positionLength": 2 - }, - { - "token": "fox", - "start_offset": 12, - "end_offset": 15, - "type": "word", - "position": 2 - }, - { - "token": "fox jumps", - "start_offset": 12, - "end_offset": 21, - "type": "shingle", - "position": 2, - "positionLength": 2 - }, - { - "token": "jumps", - "start_offset": 16, - "end_offset": 21, - "type": "word", - "position": 3 - } - ] -} ----- -//// - -To produce shingles of 2-3 words, add the following arguments to the analyze API -request: - -* `min_shingle_size`: `2` -* `max_shingle_size`: `3` - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "whitespace", - "filter": [ - { - "type": "shingle", - "min_shingle_size": 2, - "max_shingle_size": 3 - } - ], - "text": "quick brown fox jumps" -} ----- - -The filter produces the following tokens: - -[source,text] ----- -[ quick, quick brown, quick brown fox, brown, brown fox, brown fox jumps, fox, fox jumps, jumps ] ----- - -//// -[source, console-result] ----- -{ - "tokens": [ - { - "token": "quick", - "start_offset": 0, - "end_offset": 5, - "type": "word", - "position": 0 - }, - { - "token": "quick brown", - "start_offset": 0, - "end_offset": 11, - "type": "shingle", - "position": 0, - "positionLength": 2 - }, - { - "token": "quick brown fox", - "start_offset": 0, - "end_offset": 15, - "type": "shingle", - "position": 0, - "positionLength": 3 - }, - { - "token": "brown", - "start_offset": 6, - "end_offset": 11, - "type": "word", - "position": 1 - }, - { - "token": "brown fox", - "start_offset": 6, - "end_offset": 15, - "type": "shingle", - "position": 1, - "positionLength": 2 - }, - { - "token": "brown fox jumps", - "start_offset": 6, - "end_offset": 21, - "type": "shingle", - "position": 1, - "positionLength": 3 - }, - { - "token": "fox", - "start_offset": 12, - "end_offset": 15, - "type": "word", - "position": 2 - }, - { - "token": "fox jumps", - "start_offset": 12, - "end_offset": 21, - "type": "shingle", - "position": 2, - "positionLength": 2 - }, - { - "token": "jumps", - "start_offset": 16, - "end_offset": 21, - "type": "word", - "position": 3 - } - ] -} ----- -//// - -To only include shingles in the output, add an `output_unigrams` argument of -`false` to the request. - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "whitespace", - "filter": [ - { - "type": "shingle", - "min_shingle_size": 2, - "max_shingle_size": 3, - "output_unigrams": false - } - ], - "text": "quick brown fox jumps" -} ----- - -The filter produces the following tokens: - -[source,text] ----- -[ quick brown, quick brown fox, brown fox, brown fox jumps, fox jumps ] ----- - -//// -[source, console-result] ----- -{ - "tokens": [ - { - "token": "quick brown", - "start_offset": 0, - "end_offset": 11, - "type": "shingle", - "position": 0 - }, - { - "token": "quick brown fox", - "start_offset": 0, - "end_offset": 15, - "type": "shingle", - "position": 0, - "positionLength": 2 - }, - { - "token": "brown fox", - "start_offset": 6, - "end_offset": 15, - "type": "shingle", - "position": 1 - }, - { - "token": "brown fox jumps", - "start_offset": 6, - "end_offset": 21, - "type": "shingle", - "position": 1, - "positionLength": 2 - }, - { - "token": "fox jumps", - "start_offset": 12, - "end_offset": 21, - "type": "shingle", - "position": 2 - } - ] -} ----- -//// - -[[analysis-shingle-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`shingle` filter to configure a new <>. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "standard_shingle": { - "tokenizer": "standard", - "filter": [ "shingle" ] - } - } - } - } -} ----- - -[[analysis-shingle-tokenfilter-configure-parms]] -==== Configurable parameters - -`max_shingle_size`:: -(Optional, integer) -Maximum number of tokens to concatenate when creating shingles. Defaults to `2`. -+ -NOTE: This value cannot be lower than the `min_shingle_size` argument, which -defaults to `2`. The difference between this value and the `min_shingle_size` -argument cannot exceed the <> -index-level setting, which defaults to `3`. - -`min_shingle_size`:: -(Optional, integer) -Minimum number of tokens to concatenate when creating shingles. Defaults to `2`. -+ -NOTE: This value cannot exceed the `max_shingle_size` argument, which defaults -to `2`. The difference between the `max_shingle_size` argument and this value -cannot exceed the <> -index-level setting, which defaults to `3`. - -`output_unigrams`:: -(Optional, Boolean) -If `true`, the output includes the original input tokens. If `false`, the output -only includes shingles; the original input tokens are removed. Defaults to -`true`. - -`output_unigrams_if_no_shingles`:: -If `true`, the output includes the original input tokens only if no shingles are -produced; if shingles are produced, the output only includes shingles. Defaults -to `false`. -+ -IMPORTANT: If both this and the `output_unigrams` parameter are `true`, only the -`output_unigrams` argument is used. - -`token_separator`:: -(Optional, string) -Separator used to concatenate adjacent tokens to form a shingle. Defaults to a -space (`" "`). - -`filler_token`:: -+ --- -(Optional, string) -String used in shingles as a replacement for empty positions that do not contain -a token. This filler token is only used in shingles, not original unigrams. -Defaults to an underscore (`_`). - -Some token filters, such as the `stop` filter, create empty positions when -removing stop words with a position increment greater than one. - -.*Example* -[%collapsible] -==== -In the following <> request, the `stop` filter -removes the stop word `a` from `fox jumps a lazy dog`, creating an empty -position. The subsequent `shingle` filter replaces this empty position with a -plus sign (`+`) in shingles. - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "whitespace", - "filter": [ - { - "type": "stop", - "stopwords": [ "a" ] - }, - { - "type": "shingle", - "filler_token": "+" - } - ], - "text": "fox jumps a lazy dog" -} ----- - -The filter produces the following tokens: - -[source,text] ----- -[ fox, fox jumps, jumps, jumps +, + lazy, lazy, lazy dog, dog ] ----- - -//// -[source, console-result] ----- -{ - "tokens" : [ - { - "token" : "fox", - "start_offset" : 0, - "end_offset" : 3, - "type" : "word", - "position" : 0 - }, - { - "token" : "fox jumps", - "start_offset" : 0, - "end_offset" : 9, - "type" : "shingle", - "position" : 0, - "positionLength" : 2 - }, - { - "token" : "jumps", - "start_offset" : 4, - "end_offset" : 9, - "type" : "word", - "position" : 1 - }, - { - "token" : "jumps +", - "start_offset" : 4, - "end_offset" : 12, - "type" : "shingle", - "position" : 1, - "positionLength" : 2 - }, - { - "token" : "+ lazy", - "start_offset" : 12, - "end_offset" : 16, - "type" : "shingle", - "position" : 2, - "positionLength" : 2 - }, - { - "token" : "lazy", - "start_offset" : 12, - "end_offset" : 16, - "type" : "word", - "position" : 3 - }, - { - "token" : "lazy dog", - "start_offset" : 12, - "end_offset" : 20, - "type" : "shingle", - "position" : 3, - "positionLength" : 2 - }, - { - "token" : "dog", - "start_offset" : 17, - "end_offset" : 20, - "type" : "word", - "position" : 4 - } - ] -} ----- -//// -==== --- - -[[analysis-shingle-tokenfilter-customize]] -==== Customize - -To customize the `shingle` filter, duplicate it to create the basis for a new -custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following <> request -uses a custom `shingle` filter, `my_shingle_filter`, to configure a new -<>. - -The `my_shingle_filter` filter uses a `min_shingle_size` of `2` and a -`max_shingle_size` of `5`, meaning it produces shingles of 2-5 words. -The filter also includes a `output_unigrams` argument of `false`, meaning that -only shingles are included in the output. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "en": { - "tokenizer": "standard", - "filter": [ "my_shingle_filter" ] - } - }, - "filter": { - "my_shingle_filter": { - "type": "shingle", - "min_shingle_size": 2, - "max_shingle_size": 5, - "output_unigrams": false - } - } - } - } -} ----- diff --git a/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc deleted file mode 100644 index d8300288c9f4b..0000000000000 --- a/docs/reference/analysis/tokenfilters/snowball-tokenfilter.asciidoc +++ /dev/null @@ -1,39 +0,0 @@ -[[analysis-snowball-tokenfilter]] -=== Snowball token filter -++++ -Snowball -++++ - -A filter that stems words using a Snowball-generated stemmer. The -`language` parameter controls the stemmer with the following available -values: `Arabic`, `Armenian`, `Basque`, `Catalan`, `Danish`, `Dutch`, `English`, -`Estonian`, `Finnish`, `French`, `German`, `German2`, `Hungarian`, `Italian`, `Irish`, `Kp`, -`Lithuanian`, `Lovins`, `Norwegian`, `Porter`, `Portuguese`, `Romanian`, -`Russian`, `Serbian`, `Spanish`, `Swedish`, `Turkish`. - -deprecated:[8.16.0, `Kp` and `Lovins` support will be removed in a future version] - -For example: - -[source,console] --------------------------------------------------- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "standard", - "filter": [ "lowercase", "my_snow" ] - } - }, - "filter": { - "my_snow": { - "type": "snowball", - "language": "English" - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/stemmer-override-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/stemmer-override-tokenfilter.asciidoc deleted file mode 100644 index 7d070a94cb0d1..0000000000000 --- a/docs/reference/analysis/tokenfilters/stemmer-override-tokenfilter.asciidoc +++ /dev/null @@ -1,80 +0,0 @@ -[[analysis-stemmer-override-tokenfilter]] -=== Stemmer override token filter -++++ -Stemmer override -++++ - -Overrides stemming algorithms, by applying a custom mapping, then -protecting these terms from being modified by stemmers. Must be placed -before any stemming filters. - -Rules are mappings in the form of `token1[, ..., tokenN] => override`. - -[cols="<,<",options="header",] -|======================================================================= -|Setting |Description -|`rules` |A list of mapping rules to use. - -|`rules_path` |A path (either relative to `config` location, or -absolute) to a list of mappings. -|======================================================================= - -Here is an example: - -[source,console] --------------------------------------------------- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "standard", - "filter": [ "lowercase", "custom_stems", "porter_stem" ] - } - }, - "filter": { - "custom_stems": { - "type": "stemmer_override", - "rules_path": "analysis/stemmer_override.txt" - } - } - } - } -} --------------------------------------------------- - -Where the file looks like: - -[source,stemmer_override] --------------------------------------------------- -include::{elasticsearch-root}/docs/src/test/cluster/config/analysis/stemmer_override.txt[] --------------------------------------------------- - -You can also define the overrides rules inline: - -[source,console] --------------------------------------------------- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "standard", - "filter": [ "lowercase", "custom_stems", "porter_stem" ] - } - }, - "filter": { - "custom_stems": { - "type": "stemmer_override", - "rules": [ - "running, runs => run", - "stemmer => stemmer" - ] - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc deleted file mode 100644 index d9e2120afe6d1..0000000000000 --- a/docs/reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc +++ /dev/null @@ -1,287 +0,0 @@ -[[analysis-stemmer-tokenfilter]] -=== Stemmer token filter -++++ -Stemmer -++++ - -Provides <> for several languages, -some with additional variants. For a list of supported languages, see the -<> parameter. - -When not customized, the filter uses the -https://snowballstem.org/algorithms/porter/stemmer.html[porter stemming -algorithm] for English. - -[[analysis-stemmer-tokenfilter-analyze-ex]] -==== Example - -The following analyze API request uses the `stemmer` filter's default porter -stemming algorithm to stem `the foxes jumping quickly` to `the fox jump -quickli`: - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "standard", - "filter": [ "stemmer" ], - "text": "the foxes jumping quickly" -} ----- - -The filter produces the following tokens: - -[source,text] ----- -[ the, fox, jump, quickli ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "the", - "start_offset": 0, - "end_offset": 3, - "type": "", - "position": 0 - }, - { - "token": "fox", - "start_offset": 4, - "end_offset": 9, - "type": "", - "position": 1 - }, - { - "token": "jump", - "start_offset": 10, - "end_offset": 17, - "type": "", - "position": 2 - }, - { - "token": "quickli", - "start_offset": 18, - "end_offset": 25, - "type": "", - "position": 3 - } - ] -} ----- -//// - -[[analysis-stemmer-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`stemmer` filter to configure a new <>. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "whitespace", - "filter": [ "stemmer" ] - } - } - } - } -} ----- - -[role="child_attributes"] -[[analysis-stemmer-tokenfilter-configure-parms]] -==== Configurable parameters - -[[analysis-stemmer-tokenfilter-language-parm]] -`language`:: -(Optional, string) -Language-dependent stemming algorithm used to stem tokens. If both this and the -`name` parameter are specified, the `language` parameter argument is used. -+ -[%collapsible%open] -.Valid values for `language` -==== -Valid values are sorted by language. Defaults to -https://snowballstem.org/algorithms/porter/stemmer.html[*`english`*]. -Recommended algorithms are *bolded*. - -Arabic:: -{lucene-analysis-docs}/ar/ArabicStemmer.html[*`arabic`*] - -Armenian:: -https://snowballstem.org/algorithms/armenian/stemmer.html[*`armenian`*] - -Basque:: -https://snowballstem.org/algorithms/basque/stemmer.html[*`basque`*] - -Bengali:: -https://www.tandfonline.com/doi/abs/10.1080/02564602.1993.11437284[*`bengali`*] - -Brazilian Portuguese:: -{lucene-analysis-docs}/br/BrazilianStemmer.html[*`brazilian`*] - -Bulgarian:: -http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf[*`bulgarian`*] - -Catalan:: -https://snowballstem.org/algorithms/catalan/stemmer.html[*`catalan`*] - -Czech:: -https://dl.acm.org/doi/10.1016/j.ipm.2009.06.001[*`czech`*] - -Danish:: -https://snowballstem.org/algorithms/danish/stemmer.html[*`danish`*] - -Dutch:: -https://snowballstem.org/algorithms/dutch/stemmer.html[*`dutch`*], -https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`] deprecated:[8.16.0, `dutch_kp` will be removed in a future version] - -English:: -https://snowballstem.org/algorithms/porter/stemmer.html[*`english`*], -https://ciir.cs.umass.edu/pubfiles/ir-35.pdf[`light_english`], -https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`] deprecated:[8.16.0, `lovins` will be removed in a future version], -https://www.researchgate.net/publication/220433848_How_effective_is_suffixing[`minimal_english`], -https://snowballstem.org/algorithms/english/stemmer.html[`porter2`], -{lucene-analysis-docs}/en/EnglishPossessiveFilter.html[`possessive_english`] - -Estonian:: -https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/tartarus/snowball/ext/EstonianStemmer.html[*`estonian`*] - -Finnish:: -https://snowballstem.org/algorithms/finnish/stemmer.html[*`finnish`*], -http://clef.isti.cnr.it/2003/WN_web/22.pdf[`light_finnish`] - -French:: -https://dl.acm.org/citation.cfm?id=1141523[*`light_french`*], -https://snowballstem.org/algorithms/french/stemmer.html[`french`], -https://dl.acm.org/citation.cfm?id=318984[`minimal_french`] - -Galician:: -http://bvg.udc.es/recursos_lingua/stemming.jsp[*`galician`*], -http://bvg.udc.es/recursos_lingua/stemming.jsp[`minimal_galician`] (Plural step only) - -German:: -https://dl.acm.org/citation.cfm?id=1141523[*`light_german`*], -https://snowballstem.org/algorithms/german/stemmer.html[`german`], -http://members.unine.ch/jacques.savoy/clef/morpho.pdf[`minimal_german`] - -Greek:: -https://sais.se/mthprize/2007/ntais2007.pdf[*`greek`*] - -Hindi:: -http://computing.open.ac.uk/Sites/EACLSouthAsia/Papers/p6-Ramanathan.pdf[*`hindi`*] - -Hungarian:: -https://snowballstem.org/algorithms/hungarian/stemmer.html[*`hungarian`*], -https://dl.acm.org/citation.cfm?id=1141523&dl=ACM&coll=DL&CFID=179095584&CFTOKEN=80067181[`light_hungarian`] - -Indonesian:: -http://www.illc.uva.nl/Publications/ResearchReports/MoL-2003-02.text.pdf[*`indonesian`*] - -Irish:: -https://snowballstem.org/otherapps/oregan/[*`irish`*] - -Italian:: -https://www.ercim.eu/publication/ws-proceedings/CLEF2/savoy.pdf[*`light_italian`*], -https://snowballstem.org/algorithms/italian/stemmer.html[`italian`] - -Kurdish (Sorani):: -{lucene-analysis-docs}/ckb/SoraniStemmer.html[*`sorani`*] - -Latvian:: -{lucene-analysis-docs}/lv/LatvianStemmer.html[*`latvian`*] - -Lithuanian:: -https://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_5_3/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/stem_ISO_8859_1.sbl?view=markup[*`lithuanian`*] - -Norwegian (Bokmål):: -https://snowballstem.org/algorithms/norwegian/stemmer.html[*`norwegian`*], -{lucene-analysis-docs}/no/NorwegianLightStemmer.html[*`light_norwegian`*], -{lucene-analysis-docs}/no/NorwegianMinimalStemmer.html[`minimal_norwegian`] - -Norwegian (Nynorsk):: -{lucene-analysis-docs}/no/NorwegianLightStemmer.html[*`light_nynorsk`*], -{lucene-analysis-docs}/no/NorwegianMinimalStemmer.html[`minimal_nynorsk`] - -Persian:: -{lucene-analysis-docs}/fa/PersianStemmer.html[*`persian`*] - -Portuguese:: -https://dl.acm.org/citation.cfm?id=1141523&dl=ACM&coll=DL&CFID=179095584&CFTOKEN=80067181[*`light_portuguese`*], -pass:macros[http://www.inf.ufrgs.br/~buriol/papers/Orengo_CLEF07.pdf[`minimal_portuguese`\]], -https://snowballstem.org/algorithms/portuguese/stemmer.html[`portuguese`], -https://www.inf.ufrgs.br/\~viviane/rslp/index.htm[`portuguese_rslp`] - -Romanian:: -https://snowballstem.org/algorithms/romanian/stemmer.html[*`romanian`*] - -Russian:: -https://snowballstem.org/algorithms/russian/stemmer.html[*`russian`*], -https://doc.rero.ch/lm.php?url=1000%2C43%2C4%2C20091209094227-CA%2FDolamic_Ljiljana_-_Indexing_and_Searching_Strategies_for_the_Russian_20091209.pdf[`light_russian`] - -Serbian:: -https://snowballstem.org/algorithms/serbian/stemmer.html[*`serbian`*] - -Spanish:: -https://www.ercim.eu/publication/ws-proceedings/CLEF2/savoy.pdf[*`light_spanish`*], -https://snowballstem.org/algorithms/spanish/stemmer.html[`spanish`] -https://www.wikilengua.org/index.php/Plural_(formaci%C3%B3n)[`spanish_plural`] - -Swedish:: -https://snowballstem.org/algorithms/swedish/stemmer.html[*`swedish`*], -http://clef.isti.cnr.it/2003/WN_web/22.pdf[`light_swedish`] - -Turkish:: -https://snowballstem.org/algorithms/turkish/stemmer.html[*`turkish`*] -==== - -`name`:: -An alias for the <> -parameter. If both this and the `language` parameter are specified, the -`language` parameter argument is used. - -[[analysis-stemmer-tokenfilter-customize]] -==== Customize - -To customize the `stemmer` filter, duplicate it to create the basis for a new -custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom `stemmer` filter that stems -words using the `light_german` algorithm: - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "my_stemmer" - ] - } - }, - "filter": { - "my_stemmer": { - "type": "stemmer", - "language": "light_german" - } - } - } - } -} ----- diff --git a/docs/reference/analysis/tokenfilters/stop-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/stop-tokenfilter.asciidoc deleted file mode 100644 index abba633b643dc..0000000000000 --- a/docs/reference/analysis/tokenfilters/stop-tokenfilter.asciidoc +++ /dev/null @@ -1,381 +0,0 @@ -[[analysis-stop-tokenfilter]] -=== Stop token filter -++++ -Stop -++++ - -Removes {wikipedia}/Stop_words[stop words] from a token -stream. - -When not customized, the filter removes the following English stop words by -default: - -`a`, `an`, `and`, `are`, `as`, `at`, `be`, `but`, `by`, `for`, `if`, `in`, -`into`, `is`, `it`, `no`, `not`, `of`, `on`, `or`, `such`, `that`, `the`, -`their`, `then`, `there`, `these`, `they`, `this`, `to`, `was`, `will`, `with` - -In addition to English, the `stop` filter supports predefined -<>. You can also specify your own stop words as an array or file. - -The `stop` filter uses Lucene's -{lucene-analysis-docs}/core/StopFilter.html[StopFilter]. - -[[analysis-stop-tokenfilter-analyze-ex]] -==== Example - -The following analyze API request uses the `stop` filter to remove the stop words -`a` and `the` from `a quick fox jumps over the lazy dog`: - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "standard", - "filter": [ "stop" ], - "text": "a quick fox jumps over the lazy dog" -} ----- - -The filter produces the following tokens: - -[source,text] ----- -[ quick, fox, jumps, over, lazy, dog ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "quick", - "start_offset": 2, - "end_offset": 7, - "type": "", - "position": 1 - }, - { - "token": "fox", - "start_offset": 8, - "end_offset": 11, - "type": "", - "position": 2 - }, - { - "token": "jumps", - "start_offset": 12, - "end_offset": 17, - "type": "", - "position": 3 - }, - { - "token": "over", - "start_offset": 18, - "end_offset": 22, - "type": "", - "position": 4 - }, - { - "token": "lazy", - "start_offset": 27, - "end_offset": 31, - "type": "", - "position": 6 - }, - { - "token": "dog", - "start_offset": 32, - "end_offset": 35, - "type": "", - "position": 7 - } - ] -} ----- -//// - -[[analysis-stop-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the `stop` -filter to configure a new <>. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "whitespace", - "filter": [ "stop" ] - } - } - } - } -} ----- - -[[analysis-stop-tokenfilter-configure-parms]] -==== Configurable parameters - -`stopwords`:: -+ --- -(Optional, string or array of strings) -Language value, such as `_arabic_` or `_thai_`. Defaults to -<>. - -Each language value corresponds to a predefined list of stop words in Lucene. -See <> for supported language -values and their stop words. - -Also accepts an array of stop words. - -For an empty list of stop words, use `_none_`. --- - -`stopwords_path`:: -+ --- -(Optional, string) -Path to a file that contains a list of stop words to remove. - -This path must be absolute or relative to the `config` location, and the file -must be UTF-8 encoded. Each stop word in the file must be separated by a line -break. --- - -`ignore_case`:: -(Optional, Boolean) -If `true`, stop word matching is case insensitive. For example, if `true`, a -stop word of `the` matches and removes `The`, `THE`, or `the`. Defaults to -`false`. - -`remove_trailing`:: -+ --- -(Optional, Boolean) -If `true`, the last token of a stream is removed if it's a stop word. Defaults -to `true`. - -This parameter should be `false` when using the filter with a -<>. This would ensure a query like -`green a` matches and suggests `green apple` while still removing other stop -words. --- - -[[analysis-stop-tokenfilter-customize]] -==== Customize - -To customize the `stop` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom case-insensitive `stop` -filter that removes stop words from the <> stop -words list: - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "default": { - "tokenizer": "whitespace", - "filter": [ "my_custom_stop_words_filter" ] - } - }, - "filter": { - "my_custom_stop_words_filter": { - "type": "stop", - "ignore_case": true - } - } - } - } -} ----- - -You can also specify your own list of stop words. For example, the following -request creates a custom case-insensitive `stop` filter that removes only the stop -words `and`, `is`, and `the`: - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "default": { - "tokenizer": "whitespace", - "filter": [ "my_custom_stop_words_filter" ] - } - }, - "filter": { - "my_custom_stop_words_filter": { - "type": "stop", - "ignore_case": true, - "stopwords": [ "and", "is", "the" ] - } - } - } - } -} ----- - -[[analysis-stop-tokenfilter-stop-words-by-lang]] -==== Stop words by language - -The following list contains supported language values for the `stopwords` -parameter and a link to their predefined stop words in Lucene. - -[[arabic-stop-words]] -`_arabic_`:: -{lucene-stop-word-link}/ar/stopwords.txt[Arabic stop words] - -[[armenian-stop-words]] -`_armenian_`:: -{lucene-stop-word-link}/hy/stopwords.txt[Armenian stop words] - -[[basque-stop-words]] -`_basque_`:: -{lucene-stop-word-link}/eu/stopwords.txt[Basque stop words] - -[[bengali-stop-words]] -`_bengali_`:: -{lucene-stop-word-link}/bn/stopwords.txt[Bengali stop words] - -[[brazilian-stop-words]] -`_brazilian_` (Brazilian Portuguese):: -{lucene-stop-word-link}/br/stopwords.txt[Brazilian Portuguese stop words] - -[[bulgarian-stop-words]] -`_bulgarian_`:: -{lucene-stop-word-link}/bg/stopwords.txt[Bulgarian stop words] - -[[catalan-stop-words]] -`_catalan_`:: -{lucene-stop-word-link}/ca/stopwords.txt[Catalan stop words] - -[[cjk-stop-words]] -`_cjk_` (Chinese, Japanese, and Korean):: -{lucene-stop-word-link}/cjk/stopwords.txt[CJK stop words] - -[[czech-stop-words]] -`_czech_`:: -{lucene-stop-word-link}/cz/stopwords.txt[Czech stop words] - -[[danish-stop-words]] -`_danish_`:: -{lucene-stop-word-link}/snowball/danish_stop.txt[Danish stop words] - -[[dutch-stop-words]] -`_dutch_`:: -{lucene-stop-word-link}/snowball/dutch_stop.txt[Dutch stop words] - -[[english-stop-words]] -`_english_`:: -{lucene-gh-main-link}/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java#L48[English stop words] - -[[estonian-stop-words]] -`_estonian_`:: -{lucene-stop-word-link}/et/stopwords.txt[Estonian stop words] - -[[finnish-stop-words]] -`_finnish_`:: -{lucene-stop-word-link}/snowball/finnish_stop.txt[Finnish stop words] - -[[french-stop-words]] -`_french_`:: -{lucene-stop-word-link}/snowball/french_stop.txt[French stop words] - -[[galician-stop-words]] -`_galician_`:: -{lucene-stop-word-link}/gl/stopwords.txt[Galician stop words] - -[[german-stop-words]] -`_german_`:: -{lucene-stop-word-link}/snowball/german_stop.txt[German stop words] - -[[greek-stop-words]] -`_greek_`:: -{lucene-stop-word-link}/el/stopwords.txt[Greek stop words] - -[[hindi-stop-words]] -`_hindi_`:: -{lucene-stop-word-link}/hi/stopwords.txt[Hindi stop words] - -[[hungarian-stop-words]] -`_hungarian_`:: -{lucene-stop-word-link}/snowball/hungarian_stop.txt[Hungarian stop words] - -[[indonesian-stop-words]] -`_indonesian_`:: -{lucene-stop-word-link}/id/stopwords.txt[Indonesian stop words] - -[[irish-stop-words]] -`_irish_`:: -{lucene-stop-word-link}/ga/stopwords.txt[Irish stop words] - -[[italian-stop-words]] -`_italian_`:: -{lucene-stop-word-link}/snowball/italian_stop.txt[Italian stop words] - -[[latvian-stop-words]] -`_latvian_`:: -{lucene-stop-word-link}/lv/stopwords.txt[Latvian stop words] - -[[lithuanian-stop-words]] -`_lithuanian_`:: -{lucene-stop-word-link}/lt/stopwords.txt[Lithuanian stop words] - -[[norwegian-stop-words]] -`_norwegian_`:: -{lucene-stop-word-link}/snowball/norwegian_stop.txt[Norwegian stop words] - -[[persian-stop-words]] -`_persian_`:: -{lucene-stop-word-link}/fa/stopwords.txt[Persian stop words] - -[[portuguese-stop-words]] -`_portuguese_`:: -{lucene-stop-word-link}/snowball/portuguese_stop.txt[Portuguese stop words] - -[[romanian-stop-words]] -`_romanian_`:: -{lucene-stop-word-link}/ro/stopwords.txt[Romanian stop words] - -[[russian-stop-words]] -`_russian_`:: -{lucene-stop-word-link}/snowball/russian_stop.txt[Russian stop words] - -[[serbian-stop-words]] -`_serbian_`:: -{lucene-stop-word-link}/sr/stopwords.txt[Serbian stop words] - -[[sorani-stop-words]] -`_sorani_`:: -{lucene-stop-word-link}/ckb/stopwords.txt[Sorani stop words] - -[[spanish-stop-words]] -`_spanish_`:: -{lucene-stop-word-link}/snowball/spanish_stop.txt[Spanish stop words] - -[[swedish-stop-words]] -`_swedish_`:: -{lucene-stop-word-link}/snowball/swedish_stop.txt[Swedish stop words] - -[[thai-stop-words]] -`_thai_`:: -{lucene-stop-word-link}/th/stopwords.txt[Thai stop words] - -[[turkish-stop-words]] -`_turkish_`:: -{lucene-stop-word-link}/tr/stopwords.txt[Turkish stop words] diff --git a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc deleted file mode 100644 index f0fa4f30fd83f..0000000000000 --- a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc +++ /dev/null @@ -1,224 +0,0 @@ -[[analysis-synonym-graph-tokenfilter]] -=== Synonym graph token filter -++++ -Synonym graph -++++ - -The `synonym_graph` token filter allows to easily handle <>, -including multi-word synonyms correctly during the analysis process. - -In order to properly handle multi-word synonyms this token filter -creates a <> during processing. For more -information on this topic and its various complexities, please read the -http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs] blog post. - -["NOTE",id="synonym-graph-index-note"] -=============================== -This token filter is designed to be used as part of a search analyzer -only. If you want to apply synonyms during indexing please use the -standard <>. -=============================== - -[discrete] -[[analysis-synonym-graph-define-synonyms]] -==== Define synonyms sets - -include::synonyms-format.asciidoc[] - -[discrete] -[[analysis-synonym-graph-configure-sets]] -==== Configure synonyms sets - -Synonyms can be configured using the <>, a <>, or directly <> in the token filter configuration. -See <> for more details on each option. - -Use `synonyms_set` configuration option to provide a synonym set created via Synonyms Management APIs: - -[source,JSON] ----- - "filter": { - "synonyms_filter": { - "type": "synonym_graph", - "synonyms_set": "my-synonym-set", - "updateable": true - } - } ----- - -[WARNING] -====== -Synonyms sets must exist before they can be added to indices. -If an index is created referencing a nonexistent synonyms set, the index will remain in a partially created and inoperable state. -The only way to recover from this scenario is to ensure the synonyms set exists then either delete and re-create the index, or close and re-open the index. -====== - -Use `synonyms_path` to provide a synonym file : - -[source,JSON] ----- - "filter": { - "synonyms_filter": { - "type": "synonym_graph", - "synonyms_path": "analysis/synonym-set.txt" - } - } ----- - -The above configures a `synonym` filter, with a path of -`analysis/synonym-set.txt` (relative to the `config` location). - -Use `synonyms` to define inline synonyms: - -[source,JSON] ----- - "filter": { - "synonyms_filter": { - "type": "synonym_graph", - "synonyms": ["pc => personal computer", "computer, pc, laptop"] - } - } ----- - -Additional settings are: - -* `updateable` (defaults to `false`). If `true` allows -<> search analyzers to pick up -changes to synonym files. Only to be used for search analyzers. -* `expand` (defaults to `true`). -Expands definitions for equivalent synonym rules. -See <>. -* `lenient` (defaults to the value of the `updateable` setting). -If `true` ignores errors while parsing the synonym rules. -It is important to note that only those synonym rules which cannot get parsed are ignored. -See <> for an example of `lenient` behaviour for invalid synonym rules. - -[discrete] -[[synonym-graph-tokenizer-expand-equivalent-synonyms]] -===== `expand` equivalent synonym rules - -The `expand` parameter controls whether to expand equivalent synonym rules. -Consider a synonym defined like: - -`foo, bar, baz` - -Using `expand: true`, the synonym rule would be expanded into: - -``` -foo => foo -foo => bar -foo => baz -bar => foo -bar => bar -bar => baz -baz => foo -baz => bar -baz => baz -``` - -When `expand` is set to `false`, the synonym rule is not expanded and the first synonym is treated as the canonical representation. The synonym would be equivalent to: - -``` -foo => foo -bar => foo -baz => foo -``` - -The `expand` parameter does not affect explicit synonym rules, like `foo, bar => baz`. - -[discrete] -[[synonym-graph-tokenizer-ignore_case-deprecated]] -===== `tokenizer` and `ignore_case` are deprecated - -The `tokenizer` parameter controls the tokenizers that will be used to -tokenize the synonym, this parameter is for backwards compatibility for indices that created before 6.0. -The `ignore_case` parameter works with `tokenizer` parameter only. - -[discrete] -[[analysis-synonym-graph-analizers-configure]] -==== Configure analyzers with synonym graph token filters - -To apply synonyms, you will need to include a synonym graph token filter into an analyzer: - -[source,JSON] ----- - "analyzer": { - "my_analyzer": { - "type": "custom", - "tokenizer": "standard", - "filter": ["stemmer", "synonym_graph"] - } - } ----- - -[discrete] -[[analysis-synonym-graph-token-order]] -===== Token filters ordering - -Order is important for your token filters. -Text will be processed first through filters preceding the synonym filter before being processed by the synonym filter. - -{es} will also use the token filters preceding the synonym filter in a tokenizer chain to parse the entries in a synonym file or synonym set. -In the above example, the synonyms graph token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. - -Because entries in the synonym map cannot have stacked positions, some token filters may cause issues here. -Token filters that produce multiple versions of a token may choose which version of the token to emit when parsing synonyms. -For example, `asciifolding` will only produce the folded version of the token. -Others, like `multiplexer`, `word_delimiter_graph` or `ngram` will throw an error. - -If you need to build analyzers that include both multi-token filters and synonym filters, consider using the <> filter, with the multi-token filters in one branch and the synonym filter in the other. - -[discrete] -[[synonym-graph-tokenizer-stop-token-filter]] -===== Synonyms and `stop` token filters - -Synonyms and <> interact with each other in the following ways: - -[discrete] -====== Stop token filter *before* synonym token filter - -Stop words will be removed from the synonym rule definition. -This can can cause errors on the synonym rule. - -[WARNING] -==== -If `lenient` is set to `false`, invalid synonym rules can cause errors when applying analyzer changes. -For reloadable analyzers, this prevents reloading and applying changes. -You must correct errors in the synonym rules and reload the analyzer. - -When `lenient` is set to `false`, an index with invalid synonym rules cannot be reopened, making it inoperable when: - -* A node containing the index starts -* The index is opened from a closed state -* A node restart occurs (which reopens the node assigned shards) -==== - -For *explicit synonym rules* like `foo, bar => baz` with a stop filter that removes `bar`: - -- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the left hand side of the synonym rule. -- If `lenient` is set to `true`, the rule `foo => baz` will be added and `bar => baz` will be ignored. - -If the stop filter removed `baz` instead: - -- If `lenient` is set to `false`, an error will be raised as `baz` would be removed from the right hand side of the synonym rule. -- If `lenient` is set to `true`, the synonym will have no effect as the target word is removed. - -For *equivalent synonym rules* like `foo, bar, baz` and `expand: true, with a stop filter that removes `bar`: - -- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the synonym rule. -- If `lenient` is set to `true`, the synonyms added would be equivalent to the following synonym rules, which do not contain the removed word: - -``` -foo => foo -foo => baz -baz => foo -baz => baz -``` - -[discrete] -====== Stop token filter *after* synonym token filter - -The stop filter will remove the terms from the resulting synonym expansion. - -For example, a synonym rule like `foo, bar => baz` and a stop filter that removes `baz` will get no matches for `foo` or `bar`, as both would get expanded to `baz` which is removed by the stop filter. - -If the stop filter removed `foo` instead, then searching for `foo` would get expanded to `baz`, which is not removed by the stop filter thus potentially providing matches for `baz`. diff --git a/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc deleted file mode 100644 index b0020a1120fc0..0000000000000 --- a/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc +++ /dev/null @@ -1,212 +0,0 @@ -[[analysis-synonym-tokenfilter]] -=== Synonym token filter -++++ -Synonym -++++ - -The `synonym` token filter allows to easily handle <> during the -analysis process. - -[discrete] -[[analysis-synonym-define-synonyms]] -==== Define synonyms sets - -include::synonyms-format.asciidoc[] - -[discrete] -[[analysis-synonym-configure-sets]] -==== Configure synonyms sets - -Synonyms can be configured using the <>, a <>, or directly <> in the token filter configuration. -See <> for more details on each option. - -Use `synonyms_set` configuration option to provide a synonym set created via Synonyms Management APIs: - -[source,JSON] ----- - "filter": { - "synonyms_filter": { - "type": "synonym", - "synonyms_set": "my-synonym-set", - "updateable": true - } - } ----- - -[WARNING] -====== -Synonyms sets must exist before they can be added to indices. -If an index is created referencing a nonexistent synonyms set, the index will remain in a partially created and inoperable state. -The only way to recover from this scenario is to ensure the synonyms set exists then either delete and re-create the index, or close and re-open the index. -====== - -Use `synonyms_path` to provide a synonym file : - -[source,JSON] ----- - "filter": { - "synonyms_filter": { - "type": "synonym", - "synonyms_path": "analysis/synonym-set.txt" - } - } ----- - -The above configures a `synonym` filter, with a path of -`analysis/synonym-set.txt` (relative to the `config` location). - -Use `synonyms` to define inline synonyms: - -[source,JSON] ----- - "filter": { - "synonyms_filter": { - "type": "synonym", - "synonyms": ["pc => personal computer", "computer, pc, laptop"] - } - } ----- - -Additional settings are: - -* `updateable` (defaults to `false`). If `true` allows -<> search analyzers to pick up -changes to synonym files. Only to be used for search analyzers. -* `expand` (defaults to `true`). -Expands definitions for equivalent synonym rules. -See <>. -* `lenient` (defaults to the value of the `updateable` setting). -If `true` ignores errors while parsing the synonym rules. -It is important to note that only those synonym rules which cannot get parsed are ignored. -See <> for an example of `lenient` behaviour for invalid synonym rules. - -[discrete] -[[synonym-tokenizer-expand-equivalent-synonyms]] -===== `expand` equivalent synonym rules - -The `expand` parameter controls whether to expand equivalent synonym rules. -Consider a synonym defined like: - -`foo, bar, baz` - -Using `expand: true`, the synonym rule would be expanded into: - -``` -foo => foo -foo => bar -foo => baz -bar => foo -bar => bar -bar => baz -baz => foo -baz => bar -baz => baz -``` - -When `expand` is set to `false`, the synonym rule is not expanded and the first synonym is treated as the canonical representation. The synonym would be equivalent to: - -``` -foo => foo -bar => foo -baz => foo -``` - -The `expand` parameter does not affect explicit synonym rules, like `foo, bar => baz`. - -[discrete] -[[synonym-tokenizer-ignore_case-deprecated]] -===== `tokenizer` and `ignore_case` are deprecated - -The `tokenizer` parameter controls the tokenizers that will be used to -tokenize the synonym, this parameter is for backwards compatibility for indices that created before 6.0. -The `ignore_case` parameter works with `tokenizer` parameter only. - -[discrete] -[[analysis-synonym-analizers-configure]] -==== Configure analyzers with synonym token filters - -To apply synonyms, you will need to include a synonym token filters into an analyzer: - -[source,JSON] ----- - "analyzer": { - "my_analyzer": { - "type": "custom", - "tokenizer": "standard", - "filter": ["stemmer", "synonym"] - } - } ----- - -[discrete] -[[analysis-synonym-token-order]] -===== Token filters ordering - -Order is important for your token filters. -Text will be processed first through filters preceding the synonym filter before being processed by the synonym filter. - -{es} will also use the token filters preceding the synonym filter in a tokenizer chain to parse the entries in a synonym file or synonym set. -In the above example, the synonyms token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. - -Because entries in the synonym map cannot have stacked positions, some token filters may cause issues here. -Token filters that produce multiple versions of a token may choose which version of the token to emit when parsing synonyms. -For example, `asciifolding` will only produce the folded version of the token. -Others, like `multiplexer`, `word_delimiter_graph` or `ngram` will throw an error. - -If you need to build analyzers that include both multi-token filters and synonym filters, consider using the <> filter, with the multi-token filters in one branch and the synonym filter in the other. - -[discrete] -[[synonym-tokenizer-stop-token-filter]] -===== Synonyms and `stop` token filters - -Synonyms and <> interact with each other in the following ways: - -[discrete] -====== Stop token filter *before* synonym token filter - -Stop words will be removed from the synonym rule definition. -This can can cause errors on the synonym rule. - -[WARNING] -==== -If `lenient` is set to `false`, invalid synonym rules can cause errors when applying analyzer changes. -For reloadable analyzers, this prevents reloading and applying changes. -You must correct errors in the synonym rules and reload the analyzer. - -When `lenient` is set to `false`, an index with invalid synonym rules cannot be reopened, making it inoperable when: - -* A node containing the index starts -* The index is opened from a closed state -* A node restart occurs (which reopens the node assigned shards) -==== - -For *explicit synonym rules* like `foo, bar => baz` with a stop filter that removes `bar`: - -- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the left hand side of the synonym rule. -- If `lenient` is set to `true`, the rule `foo => baz` will be added and `bar => baz` will be ignored. - -If the stop filter removed `baz` instead: - -- If `lenient` is set to `false`, an error will be raised as `baz` would be removed from the right hand side of the synonym rule. -- If `lenient` is set to `true`, the synonym will have no effect as the target word is removed. - -For *equivalent synonym rules* like `foo, bar, baz` and `expand: true, with a stop filter that removes `bar`: - -- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the synonym rule. -- If `lenient` is set to `true`, the synonyms added would be equivalent to the following synonym rules, which do not contain the removed word: - -``` -foo => foo -foo => baz -baz => foo -baz => baz -``` - -[discrete] -====== Stop token filter *after* synonym token filter - -The stop filter will remove the terms from the resulting synonym expansion. - -For example, a synonym rule like `foo, bar => baz` and a stop filter that removes `baz` will get no matches for `foo` or `bar`, as both would get expanded to `baz` which is removed by the stop filter. - -If the stop filter removed `foo` instead, then searching for `foo` would get expanded to `baz`, which is not removed by the stop filter thus potentially providing matches for `baz`. diff --git a/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc b/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc deleted file mode 100644 index e780c24963312..0000000000000 --- a/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc +++ /dev/null @@ -1,44 +0,0 @@ -Synonyms in a synonyms set are defined using *synonym rules*. -Each synonym rule contains words that are synonyms. - -You can use two formats to define synonym rules: Solr and WordNet. - -[discrete] -===== Solr format - -This format uses two different definitions: - -* Equivalent synonyms: Define groups of words that are equivalent. Words are separated by commas. Example: -+ -[source,synonyms] ----- -ipod, i-pod, i pod -computer, pc, laptop ----- -* Explicit synonyms: Matches a group of words to other words. Words on the left hand side of the rule definition are expanded into all the possibilities described on the right hand side. Example: -+ -[source,synonyms] ----- -personal computer => pc -sea biscuit, sea biscit => seabiscuit ----- - -[discrete] -===== WordNet format - -https://wordnet.princeton.edu/[WordNet] defines synonyms sets spanning multiple lines. Each line contains the following information: - -* Synonyms set numeric identifier -* Ordinal of the synonym in the synonyms set -* Synonym word -* Word type identifier: Noun (n), verb (v), adjective (a) or adverb (b). -* Depth of the word in the synonym net - -The following example defines a synonym set for the words "come", "advance" and "approach": - -[source,synonyms] ----- -s(100000002,1,'come',v,1,0). -s(100000002,2,'advance',v,1,0). -s(100000002,3,'approach',v,1,0)."""; ----- diff --git a/docs/reference/analysis/tokenfilters/trim-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/trim-tokenfilter.asciidoc deleted file mode 100644 index ea39d8d4a4c94..0000000000000 --- a/docs/reference/analysis/tokenfilters/trim-tokenfilter.asciidoc +++ /dev/null @@ -1,113 +0,0 @@ -[[analysis-trim-tokenfilter]] -=== Trim token filter -++++ -Trim -++++ - -Removes leading and trailing whitespace from each token in a stream. While this -can change the length of a token, the `trim` filter does _not_ change a token's -offsets. - -The `trim` filter uses Lucene's -https://lucene.apache.org/core/{lucene_version_path}/analysis/common/org/apache/lucene/analysis/miscellaneous/TrimFilter.html[TrimFilter]. - -[TIP] -==== -Many commonly used tokenizers, such as the -<> or -<> tokenizer, remove whitespace by -default. When using these tokenizers, you don't need to add a separate `trim` -filter. -==== - -[[analysis-trim-tokenfilter-analyze-ex]] -==== Example - -To see how the `trim` filter works, you first need to produce a token -containing whitespace. - -The following <> request uses the -<> tokenizer to produce a token for -`" fox "`. - -[source,console] ----- -GET _analyze -{ - "tokenizer" : "keyword", - "text" : " fox " -} ----- - -The API returns the following response. Note the `" fox "` token contains the -original text's whitespace. Note that despite changing the token's length, the -`start_offset` and `end_offset` remain the same. - -[source,console-result] ----- -{ - "tokens": [ - { - "token": " fox ", - "start_offset": 0, - "end_offset": 5, - "type": "word", - "position": 0 - } - ] -} ----- - -To remove the whitespace, add the `trim` filter to the previous analyze API -request. - -[source,console] ----- -GET _analyze -{ - "tokenizer" : "keyword", - "filter" : ["trim"], - "text" : " fox " -} ----- - -The API returns the following response. The returned `fox` token does not -include any leading or trailing whitespace. - -[source,console-result] ----- -{ - "tokens": [ - { - "token": "fox", - "start_offset": 0, - "end_offset": 5, - "type": "word", - "position": 0 - } - ] -} ----- - -[[analysis-trim-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the `trim` -filter to configure a new <>. - -[source,console] ----- -PUT trim_example -{ - "settings": { - "analysis": { - "analyzer": { - "keyword_trim": { - "tokenizer": "keyword", - "filter": [ "trim" ] - } - } - } - } -} ----- diff --git a/docs/reference/analysis/tokenfilters/truncate-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/truncate-tokenfilter.asciidoc deleted file mode 100644 index a77387d5fd481..0000000000000 --- a/docs/reference/analysis/tokenfilters/truncate-tokenfilter.asciidoc +++ /dev/null @@ -1,148 +0,0 @@ -[[analysis-truncate-tokenfilter]] -=== Truncate token filter -++++ -Truncate -++++ - -Truncates tokens that exceed a specified character limit. This limit defaults to -`10` but can be customized using the `length` parameter. - -For example, you can use the `truncate` filter to shorten all tokens to -`3` characters or fewer, changing `jumping fox` to `jum fox`. - -This filter uses Lucene's -{lucene-analysis-docs}/miscellaneous/TruncateTokenFilter.html[TruncateTokenFilter]. - -[[analysis-truncate-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `truncate` filter -to shorten tokens that exceed 10 characters in -`the quinquennial extravaganza carried on`: - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer" : "whitespace", - "filter" : ["truncate"], - "text" : "the quinquennial extravaganza carried on" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ the, quinquenni, extravagan, carried, on ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "the", - "start_offset" : 0, - "end_offset" : 3, - "type" : "word", - "position" : 0 - }, - { - "token" : "quinquenni", - "start_offset" : 4, - "end_offset" : 16, - "type" : "word", - "position" : 1 - }, - { - "token" : "extravagan", - "start_offset" : 17, - "end_offset" : 29, - "type" : "word", - "position" : 2 - }, - { - "token" : "carried", - "start_offset" : 30, - "end_offset" : 37, - "type" : "word", - "position" : 3 - }, - { - "token" : "on", - "start_offset" : 38, - "end_offset" : 40, - "type" : "word", - "position" : 4 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-truncate-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`truncate` filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT custom_truncate_example -{ - "settings" : { - "analysis" : { - "analyzer" : { - "standard_truncate" : { - "tokenizer" : "standard", - "filter" : ["truncate"] - } - } - } - } -} --------------------------------------------------- - -[[analysis-truncate-tokenfilter-configure-parms]] -==== Configurable parameters - -`length`:: -(Optional, integer) -Character limit for each token. Tokens exceeding this limit are truncated. -Defaults to `10`. - -[[analysis-truncate-tokenfilter-customize]] -==== Customize - -To customize the `truncate` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom `truncate` filter, -`5_char_trunc`, that shortens tokens to a `length` of `5` or fewer characters: - -[source,console] --------------------------------------------------- -PUT 5_char_words_example -{ - "settings": { - "analysis": { - "analyzer": { - "lowercase_5_char": { - "tokenizer": "lowercase", - "filter": [ "5_char_trunc" ] - } - }, - "filter": { - "5_char_trunc": { - "type": "truncate", - "length": 5 - } - } - } - } -} --------------------------------------------------- \ No newline at end of file diff --git a/docs/reference/analysis/tokenfilters/unique-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/unique-tokenfilter.asciidoc deleted file mode 100644 index f88cad3296282..0000000000000 --- a/docs/reference/analysis/tokenfilters/unique-tokenfilter.asciidoc +++ /dev/null @@ -1,150 +0,0 @@ -[[analysis-unique-tokenfilter]] -=== Unique token filter -++++ -Unique -++++ - -Removes duplicate tokens from a stream. For example, you can use the `unique` -filter to change `the lazy lazy dog` to `the lazy dog`. - -If the `only_on_same_position` parameter is set to `true`, the `unique` filter -removes only duplicate tokens _in the same position_. - -[NOTE] -==== -When `only_on_same_position` is `true`, the `unique` filter works the same as -<> filter. -==== - -[[analysis-unique-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the `unique` filter -to remove duplicate tokens from `the quick fox jumps the lazy fox`: - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer" : "whitespace", - "filter" : ["unique"], - "text" : "the quick fox jumps the lazy fox" -} --------------------------------------------------- - -The filter removes duplicated tokens for `the` and `fox`, producing the -following output: - -[source,text] --------------------------------------------------- -[ the, quick, fox, jumps, lazy ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "the", - "start_offset" : 0, - "end_offset" : 3, - "type" : "word", - "position" : 0 - }, - { - "token" : "quick", - "start_offset" : 4, - "end_offset" : 9, - "type" : "word", - "position" : 1 - }, - { - "token" : "fox", - "start_offset" : 10, - "end_offset" : 13, - "type" : "word", - "position" : 2 - }, - { - "token" : "jumps", - "start_offset" : 14, - "end_offset" : 19, - "type" : "word", - "position" : 3 - }, - { - "token" : "lazy", - "start_offset" : 24, - "end_offset" : 28, - "type" : "word", - "position" : 5 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-unique-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`unique` filter to configure a new <>. - -[source,console] --------------------------------------------------- -PUT custom_unique_example -{ - "settings" : { - "analysis" : { - "analyzer" : { - "standard_truncate" : { - "tokenizer" : "standard", - "filter" : ["unique"] - } - } - } - } -} --------------------------------------------------- - -[[analysis-unique-tokenfilter-configure-parms]] -==== Configurable parameters - -`only_on_same_position`:: -(Optional, Boolean) -If `true`, only remove duplicate tokens in the same position. -Defaults to `false`. - -[[analysis-unique-tokenfilter-customize]] -==== Customize - -To customize the `unique` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a custom `unique` filter with -`only_on_same_position` set to `true`. - -[source,console] --------------------------------------------------- -PUT letter_unique_pos_example -{ - "settings": { - "analysis": { - "analyzer": { - "letter_unique_pos": { - "tokenizer": "letter", - "filter": [ "unique_pos" ] - } - }, - "filter": { - "unique_pos": { - "type": "unique", - "only_on_same_position": true - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/uppercase-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/uppercase-tokenfilter.asciidoc deleted file mode 100644 index 9192a46810adb..0000000000000 --- a/docs/reference/analysis/tokenfilters/uppercase-tokenfilter.asciidoc +++ /dev/null @@ -1,106 +0,0 @@ -[[analysis-uppercase-tokenfilter]] -=== Uppercase token filter -++++ -Uppercase -++++ - -Changes token text to uppercase. For example, you can use the `uppercase` filter -to change `the Lazy DoG` to `THE LAZY DOG`. - -This filter uses Lucene's -{lucene-analysis-docs}/core/UpperCaseFilter.html[UpperCaseFilter]. - -[WARNING] -==== -Depending on the language, an uppercase character can map to multiple -lowercase characters. Using the `uppercase` filter could result in the loss of -lowercase character information. - -To avoid this loss but still have a consistent letter case, use the -<> filter instead. -==== - -[[analysis-uppercase-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the default -`uppercase` filter to change the `the Quick FoX JUMPs` to uppercase: - -[source,console] --------------------------------------------------- -GET _analyze -{ - "tokenizer" : "standard", - "filter" : ["uppercase"], - "text" : "the Quick FoX JUMPs" -} --------------------------------------------------- - -The filter produces the following tokens: - -[source,text] --------------------------------------------------- -[ THE, QUICK, FOX, JUMPS ] --------------------------------------------------- - -///////////////////// -[source,console-result] --------------------------------------------------- -{ - "tokens" : [ - { - "token" : "THE", - "start_offset" : 0, - "end_offset" : 3, - "type" : "", - "position" : 0 - }, - { - "token" : "QUICK", - "start_offset" : 4, - "end_offset" : 9, - "type" : "", - "position" : 1 - }, - { - "token" : "FOX", - "start_offset" : 10, - "end_offset" : 13, - "type" : "", - "position" : 2 - }, - { - "token" : "JUMPS", - "start_offset" : 14, - "end_offset" : 19, - "type" : "", - "position" : 3 - } - ] -} --------------------------------------------------- -///////////////////// - -[[analysis-uppercase-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`uppercase` filter to configure a new -<>. - -[source,console] --------------------------------------------------- -PUT uppercase_example -{ - "settings": { - "analysis": { - "analyzer": { - "whitespace_uppercase": { - "tokenizer": "whitespace", - "filter": [ "uppercase" ] - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/analysis/tokenfilters/word-delimiter-graph-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/word-delimiter-graph-tokenfilter.asciidoc deleted file mode 100644 index 2999df6a9a1ba..0000000000000 --- a/docs/reference/analysis/tokenfilters/word-delimiter-graph-tokenfilter.asciidoc +++ /dev/null @@ -1,505 +0,0 @@ -[[analysis-word-delimiter-graph-tokenfilter]] -=== Word delimiter graph token filter -++++ -Word delimiter graph -++++ - -Splits tokens at non-alphanumeric characters. The `word_delimiter_graph` filter -also performs optional token normalization based on a set of rules. By default, -the filter uses the following rules: - -* Split tokens at non-alphanumeric characters. - The filter uses these characters as delimiters. - For example: `Super-Duper` -> `Super`, `Duper` -* Remove leading or trailing delimiters from each token. - For example: `XL---42+'Autocoder'` -> `XL`, `42`, `Autocoder` -* Split tokens at letter case transitions. - For example: `PowerShot` -> `Power`, `Shot` -* Split tokens at letter-number transitions. - For example: `XL500` -> `XL`, `500` -* Remove the English possessive (`'s`) from the end of each token. - For example: `Neil's` -> `Neil` - -The `word_delimiter_graph` filter uses Lucene's -{lucene-analysis-docs}/miscellaneous/WordDelimiterGraphFilter.html[WordDelimiterGraphFilter]. - -[TIP] -==== -The `word_delimiter_graph` filter was designed to remove punctuation from -complex identifiers, such as product IDs or part numbers. For these use cases, -we recommend using the `word_delimiter_graph` filter with the -<> tokenizer. - -Avoid using the `word_delimiter_graph` filter to split hyphenated words, such as -`wi-fi`. Because users often search for these words both with and without -hyphens, we recommend using the -<> filter instead. -==== - -[[analysis-word-delimiter-graph-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the -`word_delimiter_graph` filter to split `Neil's-Super-Duper-XL500--42+AutoCoder` -into normalized tokens using the filter's default rules: - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "keyword", - "filter": [ "word_delimiter_graph" ], - "text": "Neil's-Super-Duper-XL500--42+AutoCoder" -} ----- - -The filter produces the following tokens: - -[source,txt] ----- -[ Neil, Super, Duper, XL, 500, 42, Auto, Coder ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "Neil", - "start_offset": 0, - "end_offset": 4, - "type": "word", - "position": 0 - }, - { - "token": "Super", - "start_offset": 7, - "end_offset": 12, - "type": "word", - "position": 1 - }, - { - "token": "Duper", - "start_offset": 13, - "end_offset": 18, - "type": "word", - "position": 2 - }, - { - "token": "XL", - "start_offset": 19, - "end_offset": 21, - "type": "word", - "position": 3 - }, - { - "token": "500", - "start_offset": 21, - "end_offset": 24, - "type": "word", - "position": 4 - }, - { - "token": "42", - "start_offset": 26, - "end_offset": 28, - "type": "word", - "position": 5 - }, - { - "token": "Auto", - "start_offset": 29, - "end_offset": 33, - "type": "word", - "position": 6 - }, - { - "token": "Coder", - "start_offset": 33, - "end_offset": 38, - "type": "word", - "position": 7 - } - ] -} ----- -//// - -[[analysis-word-delimiter-graph-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`word_delimiter_graph` filter to configure a new -<>. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "keyword", - "filter": [ "word_delimiter_graph" ] - } - } - } - } -} ----- - -[WARNING] -==== -Avoid using the `word_delimiter_graph` filter with tokenizers that remove -punctuation, such as the <> tokenizer. -This could prevent the `word_delimiter_graph` filter from splitting tokens -correctly. It can also interfere with the filter's configurable parameters, such -as <> or -<>. We -recommend using the <> or -<> tokenizer instead. -==== - -[[word-delimiter-graph-tokenfilter-configure-parms]] -==== Configurable parameters - -[[word-delimiter-graph-tokenfilter-adjust-offsets]] -`adjust_offsets`:: -+ --- -(Optional, Boolean) -If `true`, the filter adjusts the offsets of split or catenated tokens to better -reflect their actual position in the token stream. Defaults to `true`. - -[WARNING] -==== -Set `adjust_offsets` to `false` if your analyzer uses filters, such as the -<> filter, that change the length of tokens -without changing their offsets. Otherwise, the `word_delimiter_graph` filter -could produce tokens with illegal offsets. -==== --- - -[[word-delimiter-graph-tokenfilter-catenate-all]] -`catenate_all`:: -+ --- -(Optional, Boolean) -If `true`, the filter produces catenated tokens for chains of alphanumeric -characters separated by non-alphabetic delimiters. For example: -`super-duper-xl-500` -> [ **`superduperxl500`**, `super`, `duper`, `xl`, `500` ]. -Defaults to `false`. - -[WARNING] -==== -Setting this parameter to `true` produces multi-position tokens, which are not -supported by indexing. - -If this parameter is `true`, avoid using this filter in an index analyzer or -use the <> filter after -this filter to make the token stream suitable for indexing. - -When used for search analysis, catenated tokens can cause problems for the -<> query and other queries that -rely on token position for matching. Avoid setting this parameter to `true` if -you plan to use these queries. -==== --- - -[[word-delimiter-graph-tokenfilter-catenate-numbers]] -`catenate_numbers`:: -+ --- -(Optional, Boolean) -If `true`, the filter produces catenated tokens for chains of numeric characters -separated by non-alphabetic delimiters. For example: `01-02-03` -> -[ **`010203`**, `01`, `02`, `03` ]. Defaults to `false`. - -[WARNING] -==== -Setting this parameter to `true` produces multi-position tokens, which are not -supported by indexing. - -If this parameter is `true`, avoid using this filter in an index analyzer or -use the <> filter after -this filter to make the token stream suitable for indexing. - -When used for search analysis, catenated tokens can cause problems for the -<> query and other queries that -rely on token position for matching. Avoid setting this parameter to `true` if -you plan to use these queries. -==== --- - -[[word-delimiter-graph-tokenfilter-catenate-words]] -`catenate_words`:: -+ --- -(Optional, Boolean) -If `true`, the filter produces catenated tokens for chains of alphabetical -characters separated by non-alphabetic delimiters. For example: `super-duper-xl` --> [ **`superduperxl`**, `super`, `duper`, `xl` ]. Defaults to `false`. - -[WARNING] -==== -Setting this parameter to `true` produces multi-position tokens, which are not -supported by indexing. - -If this parameter is `true`, avoid using this filter in an index analyzer or -use the <> filter after -this filter to make the token stream suitable for indexing. - -When used for search analysis, catenated tokens can cause problems for the -<> query and other queries that -rely on token position for matching. Avoid setting this parameter to `true` if -you plan to use these queries. -==== --- - -`generate_number_parts`:: -(Optional, Boolean) -If `true`, the filter includes tokens consisting of only numeric characters in -the output. If `false`, the filter excludes these tokens from the output. -Defaults to `true`. - -`generate_word_parts`:: -(Optional, Boolean) -If `true`, the filter includes tokens consisting of only alphabetical characters -in the output. If `false`, the filter excludes these tokens from the output. -Defaults to `true`. - -`ignore_keywords`:: -(Optional, Boolean) -If `true`, the filter skips tokens with -a `keyword` attribute of `true`. -Defaults to `false`. - -[[word-delimiter-graph-tokenfilter-preserve-original]] -`preserve_original`:: -+ --- -(Optional, Boolean) -If `true`, the filter includes the original version of any split tokens in the -output. This original version includes non-alphanumeric delimiters. For example: -`super-duper-xl-500` -> [ **`super-duper-xl-500`**, `super`, `duper`, `xl`, -`500` ]. Defaults to `false`. - -[WARNING] -==== -Setting this parameter to `true` produces multi-position tokens, which are not -supported by indexing. - -If this parameter is `true`, avoid using this filter in an index analyzer or -use the <> filter after -this filter to make the token stream suitable for indexing. -==== --- - -`protected_words`:: -(Optional, array of strings) -Array of tokens the filter won't split. - -`protected_words_path`:: -+ --- -(Optional, string) -Path to a file that contains a list of tokens the filter won't split. - -This path must be absolute or relative to the `config` location, and the file -must be UTF-8 encoded. Each token in the file must be separated by a line -break. --- - -`split_on_case_change`:: -(Optional, Boolean) -If `true`, the filter splits tokens at letter case transitions. For example: -`camelCase` -> [ `camel`, `Case` ]. Defaults to `true`. - -`split_on_numerics`:: -(Optional, Boolean) -If `true`, the filter splits tokens at letter-number transitions. For example: -`j2se` -> [ `j`, `2`, `se` ]. Defaults to `true`. - -`stem_english_possessive`:: -(Optional, Boolean) -If `true`, the filter removes the English possessive (`'s`) from the end of each -token. For example: `O'Neil's` -> [ `O`, `Neil` ]. Defaults to `true`. - -`type_table`:: -+ --- -(Optional, array of strings) -Array of custom type mappings for characters. This allows you to map -non-alphanumeric characters as numeric or alphanumeric to avoid splitting on -those characters. - -For example, the following array maps the plus (`+`) and hyphen (`-`) characters -as alphanumeric, which means they won't be treated as delimiters: - -`[ "+ => ALPHA", "- => ALPHA" ]` - -Supported types include: - -* `ALPHA` (Alphabetical) -* `ALPHANUM` (Alphanumeric) -* `DIGIT` (Numeric) -* `LOWER` (Lowercase alphabetical) -* `SUBWORD_DELIM` (Non-alphanumeric delimiter) -* `UPPER` (Uppercase alphabetical) --- - -`type_table_path`:: -+ --- -(Optional, string) -Path to a file that contains custom type mappings for characters. This allows -you to map non-alphanumeric characters as numeric or alphanumeric to avoid -splitting on those characters. - -For example, the contents of this file may contain the following: - -[source,txt] ----- -# Map the $, %, '.', and ',' characters to DIGIT -# This might be useful for financial data. -$ => DIGIT -% => DIGIT -. => DIGIT -\\u002C => DIGIT - -# in some cases you might not want to split on ZWJ -# this also tests the case where we need a bigger byte[] -# see https://en.wikipedia.org/wiki/Zero-width_joiner -\\u200D => ALPHANUM ----- - -Supported types include: - -* `ALPHA` (Alphabetical) -* `ALPHANUM` (Alphanumeric) -* `DIGIT` (Numeric) -* `LOWER` (Lowercase alphabetical) -* `SUBWORD_DELIM` (Non-alphanumeric delimiter) -* `UPPER` (Uppercase alphabetical) - -This file path must be absolute or relative to the `config` location, and the -file must be UTF-8 encoded. Each mapping in the file must be separated by a line -break. --- - -[[analysis-word-delimiter-graph-tokenfilter-customize]] -==== Customize - -To customize the `word_delimiter_graph` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a `word_delimiter_graph` -filter that uses the following rules: - -* Split tokens at non-alphanumeric characters, _except_ the hyphen (`-`) - character. -* Remove leading or trailing delimiters from each token. -* Do _not_ split tokens at letter case transitions. -* Do _not_ split tokens at letter-number transitions. -* Remove the English possessive (`'s`) from the end of each token. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "keyword", - "filter": [ "my_custom_word_delimiter_graph_filter" ] - } - }, - "filter": { - "my_custom_word_delimiter_graph_filter": { - "type": "word_delimiter_graph", - "type_table": [ "- => ALPHA" ], - "split_on_case_change": false, - "split_on_numerics": false, - "stem_english_possessive": true - } - } - } - } -} ----- - -[[analysis-word-delimiter-graph-differences]] -==== Differences between `word_delimiter_graph` and `word_delimiter` - -Both the `word_delimiter_graph` and -<> filters produce tokens -that span multiple positions when any of the following parameters are `true`: - - * <> - * <> - * <> - * <> - -However, only the `word_delimiter_graph` filter assigns multi-position tokens a -`positionLength` attribute, which indicates the number of positions a token -spans. This ensures the `word_delimiter_graph` filter always produces valid -<>. - -The `word_delimiter` filter does not assign multi-position tokens a -`positionLength` attribute. This means it produces invalid graphs for streams -including these tokens. - -While indexing does not support token graphs containing multi-position tokens, -queries, such as the <> query, can -use these graphs to generate multiple sub-queries from a single query string. - -To see how token graphs produced by the `word_delimiter` and -`word_delimiter_graph` filters differ, check out the following example. - -.*Example* -[%collapsible] -==== - -[[analysis-word-delimiter-graph-basic-token-graph]] -*Basic token graph* - -Both the `word_delimiter` and `word_delimiter_graph` produce the following token -graph for `PowerShot2000` when the following parameters are `false`: - - * <> - * <> - * <> - * <> - -This graph does not contain multi-position tokens. All tokens span only one -position. - -image::images/analysis/token-graph-basic.svg[align="center"] - -[[analysis-word-delimiter-graph-wdg-token-graph]] -*`word_delimiter_graph` graph with a multi-position token* - -The `word_delimiter_graph` filter produces the following token graph for -`PowerShot2000` when `catenate_words` is `true`. - -This graph correctly indicates the catenated `PowerShot` token spans two -positions. - -image::images/analysis/token-graph-wdg.svg[align="center"] - -[[analysis-word-delimiter-graph-wd-token-graph]] -*`word_delimiter` graph with a multi-position token* - -When `catenate_words` is `true`, the `word_delimiter` filter produces -the following token graph for `PowerShot2000`. - -Note that the catenated `PowerShot` token should span two positions but only -spans one in the token graph, making it invalid. - -image::images/analysis/token-graph-wd.svg[align="center"] - -==== diff --git a/docs/reference/analysis/tokenfilters/word-delimiter-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/word-delimiter-tokenfilter.asciidoc deleted file mode 100644 index c65dade627298..0000000000000 --- a/docs/reference/analysis/tokenfilters/word-delimiter-tokenfilter.asciidoc +++ /dev/null @@ -1,382 +0,0 @@ -[[analysis-word-delimiter-tokenfilter]] -=== Word delimiter token filter -++++ -Word delimiter -++++ - -[WARNING] -==== -We recommend using the -<> instead of -the `word_delimiter` filter. - -The `word_delimiter` filter can produce invalid token graphs. See -<>. - -The `word_delimiter` filter also uses Lucene's -{lucene-analysis-docs}/miscellaneous/WordDelimiterFilter.html[WordDelimiterFilter], -which is marked as deprecated. -==== - -Splits tokens at non-alphanumeric characters. The `word_delimiter` filter -also performs optional token normalization based on a set of rules. By default, -the filter uses the following rules: - -* Split tokens at non-alphanumeric characters. - The filter uses these characters as delimiters. - For example: `Super-Duper` -> `Super`, `Duper` -* Remove leading or trailing delimiters from each token. - For example: `XL---42+'Autocoder'` -> `XL`, `42`, `Autocoder` -* Split tokens at letter case transitions. - For example: `PowerShot` -> `Power`, `Shot` -* Split tokens at letter-number transitions. - For example: `XL500` -> `XL`, `500` -* Remove the English possessive (`'s`) from the end of each token. - For example: `Neil's` -> `Neil` - -[TIP] -==== -The `word_delimiter` filter was designed to remove punctuation from complex -identifiers, such as product IDs or part numbers. For these use cases, we -recommend using the `word_delimiter` filter with the -<> tokenizer. - -Avoid using the `word_delimiter` filter to split hyphenated words, such as -`wi-fi`. Because users often search for these words both with and without -hyphens, we recommend using the -<> filter instead. -==== - -[[analysis-word-delimiter-tokenfilter-analyze-ex]] -==== Example - -The following <> request uses the -`word_delimiter` filter to split `Neil's-Super-Duper-XL500--42+AutoCoder` -into normalized tokens using the filter's default rules: - -[source,console] ----- -GET /_analyze -{ - "tokenizer": "keyword", - "filter": [ "word_delimiter" ], - "text": "Neil's-Super-Duper-XL500--42+AutoCoder" -} ----- - -The filter produces the following tokens: - -[source,txt] ----- -[ Neil, Super, Duper, XL, 500, 42, Auto, Coder ] ----- - -//// -[source,console-result] ----- -{ - "tokens": [ - { - "token": "Neil", - "start_offset": 0, - "end_offset": 4, - "type": "word", - "position": 0 - }, - { - "token": "Super", - "start_offset": 7, - "end_offset": 12, - "type": "word", - "position": 1 - }, - { - "token": "Duper", - "start_offset": 13, - "end_offset": 18, - "type": "word", - "position": 2 - }, - { - "token": "XL", - "start_offset": 19, - "end_offset": 21, - "type": "word", - "position": 3 - }, - { - "token": "500", - "start_offset": 21, - "end_offset": 24, - "type": "word", - "position": 4 - }, - { - "token": "42", - "start_offset": 26, - "end_offset": 28, - "type": "word", - "position": 5 - }, - { - "token": "Auto", - "start_offset": 29, - "end_offset": 33, - "type": "word", - "position": 6 - }, - { - "token": "Coder", - "start_offset": 33, - "end_offset": 38, - "type": "word", - "position": 7 - } - ] -} ----- -//// - -[analysis-word-delimiter-tokenfilter-analyzer-ex]] -==== Add to an analyzer - -The following <> request uses the -`word_delimiter` filter to configure a new -<>. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "keyword", - "filter": [ "word_delimiter" ] - } - } - } - } -} ----- - -[WARNING] -==== -Avoid using the `word_delimiter` filter with tokenizers that remove punctuation, -such as the <> tokenizer. This could -prevent the `word_delimiter` filter from splitting tokens correctly. It can also -interfere with the filter's configurable parameters, such as `catenate_all` or -`preserve_original`. We recommend using the -<> or -<> tokenizer instead. -==== - -[[word-delimiter-tokenfilter-configure-parms]] -==== Configurable parameters - -`catenate_all`:: -+ --- -(Optional, Boolean) -If `true`, the filter produces catenated tokens for chains of alphanumeric -characters separated by non-alphabetic delimiters. For example: -`super-duper-xl-500` -> [ `super`, **`superduperxl500`**, `duper`, `xl`, `500` -]. Defaults to `false`. - -[WARNING] -==== -When used for search analysis, catenated tokens can cause problems for the -<> query and other queries that -rely on token position for matching. Avoid setting this parameter to `true` if -you plan to use these queries. -==== --- - -`catenate_numbers`:: -+ --- -(Optional, Boolean) -If `true`, the filter produces catenated tokens for chains of numeric characters -separated by non-alphabetic delimiters. For example: `01-02-03` -> -[ `01`, **`010203`**, `02`, `03` ]. Defaults to `false`. - -[WARNING] -==== -When used for search analysis, catenated tokens can cause problems for the -<> query and other queries that -rely on token position for matching. Avoid setting this parameter to `true` if -you plan to use these queries. -==== --- - -`catenate_words`:: -+ --- -(Optional, Boolean) -If `true`, the filter produces catenated tokens for chains of alphabetical -characters separated by non-alphabetic delimiters. For example: `super-duper-xl` --> [ `super`, **`superduperxl`**, `duper`, `xl` ]. Defaults to `false`. - -[WARNING] -==== -When used for search analysis, catenated tokens can cause problems for the -<> query and other queries that -rely on token position for matching. Avoid setting this parameter to `true` if -you plan to use these queries. -==== --- - -`generate_number_parts`:: -(Optional, Boolean) -If `true`, the filter includes tokens consisting of only numeric characters in -the output. If `false`, the filter excludes these tokens from the output. -Defaults to `true`. - -`generate_word_parts`:: -(Optional, Boolean) -If `true`, the filter includes tokens consisting of only alphabetical characters -in the output. If `false`, the filter excludes these tokens from the output. -Defaults to `true`. - -`preserve_original`:: -(Optional, Boolean) -If `true`, the filter includes the original version of any split tokens in the -output. This original version includes non-alphanumeric delimiters. For example: -`super-duper-xl-500` -> [ **`super-duper-xl-500`**, `super`, `duper`, `xl`, -`500` ]. Defaults to `false`. - -`protected_words`:: -(Optional, array of strings) -Array of tokens the filter won't split. - -`protected_words_path`:: -+ --- -(Optional, string) -Path to a file that contains a list of tokens the filter won't split. - -This path must be absolute or relative to the `config` location, and the file -must be UTF-8 encoded. Each token in the file must be separated by a line -break. --- - -`split_on_case_change`:: -(Optional, Boolean) -If `true`, the filter splits tokens at letter case transitions. For example: -`camelCase` -> [ `camel`, `Case` ]. Defaults to `true`. - -`split_on_numerics`:: -(Optional, Boolean) -If `true`, the filter splits tokens at letter-number transitions. For example: -`j2se` -> [ `j`, `2`, `se` ]. Defaults to `true`. - -`stem_english_possessive`:: -(Optional, Boolean) -If `true`, the filter removes the English possessive (`'s`) from the end of each -token. For example: `O'Neil's` -> [ `O`, `Neil` ]. Defaults to `true`. - -`type_table`:: -+ --- -(Optional, array of strings) -Array of custom type mappings for characters. This allows you to map -non-alphanumeric characters as numeric or alphanumeric to avoid splitting on -those characters. - -For example, the following array maps the plus (`+`) and hyphen (`-`) characters -as alphanumeric, which means they won't be treated as delimiters: - -`[ "+ => ALPHA", "- => ALPHA" ]` - -Supported types include: - -* `ALPHA` (Alphabetical) -* `ALPHANUM` (Alphanumeric) -* `DIGIT` (Numeric) -* `LOWER` (Lowercase alphabetical) -* `SUBWORD_DELIM` (Non-alphanumeric delimiter) -* `UPPER` (Uppercase alphabetical) --- - -`type_table_path`:: -+ --- -(Optional, string) -Path to a file that contains custom type mappings for characters. This allows -you to map non-alphanumeric characters as numeric or alphanumeric to avoid -splitting on those characters. - -For example, the contents of this file may contain the following: - -[source,txt] ----- -# Map the $, %, '.', and ',' characters to DIGIT -# This might be useful for financial data. -$ => DIGIT -% => DIGIT -. => DIGIT -\\u002C => DIGIT - -# in some cases you might not want to split on ZWJ -# this also tests the case where we need a bigger byte[] -# see https://en.wikipedia.org/wiki/Zero-width_joiner -\\u200D => ALPHANUM ----- - -Supported types include: - -* `ALPHA` (Alphabetical) -* `ALPHANUM` (Alphanumeric) -* `DIGIT` (Numeric) -* `LOWER` (Lowercase alphabetical) -* `SUBWORD_DELIM` (Non-alphanumeric delimiter) -* `UPPER` (Uppercase alphabetical) - -This file path must be absolute or relative to the `config` location, and the -file must be UTF-8 encoded. Each mapping in the file must be separated by a line -break. --- - -[[analysis-word-delimiter-tokenfilter-customize]] -==== Customize - -To customize the `word_delimiter` filter, duplicate it to create the basis -for a new custom token filter. You can modify the filter using its configurable -parameters. - -For example, the following request creates a `word_delimiter` -filter that uses the following rules: - -* Split tokens at non-alphanumeric characters, _except_ the hyphen (`-`) - character. -* Remove leading or trailing delimiters from each token. -* Do _not_ split tokens at letter case transitions. -* Do _not_ split tokens at letter-number transitions. -* Remove the English possessive (`'s`) from the end of each token. - -[source,console] ----- -PUT /my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "keyword", - "filter": [ "my_custom_word_delimiter_filter" ] - } - }, - "filter": { - "my_custom_word_delimiter_filter": { - "type": "word_delimiter", - "type_table": [ "- => ALPHA" ], - "split_on_case_change": false, - "split_on_numerics": false, - "stem_english_possessive": true - } - } - } - } -} ----- diff --git a/docs/reference/analysis/tokenizers.asciidoc b/docs/reference/analysis/tokenizers.asciidoc deleted file mode 100644 index 89928f07b5638..0000000000000 --- a/docs/reference/analysis/tokenizers.asciidoc +++ /dev/null @@ -1,163 +0,0 @@ -[[analysis-tokenizers]] -== Tokenizer reference - -.Difference between {es} tokenization and neural tokenization -[NOTE] -==== -{es}'s tokenization process produces linguistic tokens, optimized for search and retrieval. -This differs from neural tokenization in the context of machine learning and natural language processing. Neural tokenizers translate strings into smaller, subword tokens, which are encoded into vectors for consumptions by neural networks. -{es} does not have built-in neural tokenizers. -==== - -A _tokenizer_ receives a stream of characters, breaks it up into individual -_tokens_ (usually individual words), and outputs a stream of _tokens_. For -instance, a <> tokenizer breaks -text into tokens whenever it sees any whitespace. It would convert the text -`"Quick brown fox!"` into the terms `[Quick, brown, fox!]`. - -The tokenizer is also responsible for recording the following: - -* Order or _position_ of each term (used for phrase and word proximity queries) -* Start and end _character offsets_ of the original word which the term -represents (used for highlighting search snippets). -* _Token type_, a classification of each term produced, such as ``, -``, or ``. Simpler analyzers only produce the `word` token type. - -Elasticsearch has a number of built in tokenizers which can be used to build -<>. - -[discrete] -=== Word Oriented Tokenizers - -The following tokenizers are usually used for tokenizing full text into -individual words: - -<>:: - -The `standard` tokenizer divides text into terms on word boundaries, as -defined by the Unicode Text Segmentation algorithm. It removes most -punctuation symbols. It is the best choice for most languages. - -<>:: - -The `letter` tokenizer divides text into terms whenever it encounters a -character which is not a letter. - -<>:: - -The `lowercase` tokenizer, like the `letter` tokenizer, divides text into -terms whenever it encounters a character which is not a letter, but it also -lowercases all terms. - -<>:: - -The `whitespace` tokenizer divides text into terms whenever it encounters any -whitespace character. - -<>:: - -The `uax_url_email` tokenizer is like the `standard` tokenizer except that it -recognises URLs and email addresses as single tokens. - -<>:: - -The `classic` tokenizer is a grammar based tokenizer for the English Language. - -<>:: - -The `thai` tokenizer segments Thai text into words. - -[discrete] -=== Partial Word Tokenizers - -These tokenizers break up text or words into small fragments, for partial word -matching: - -<>:: - -The `ngram` tokenizer can break up text into words when it encounters any of -a list of specified characters (e.g. whitespace or punctuation), then it returns -n-grams of each word: a sliding window of continuous letters, e.g. `quick` -> -`[qu, ui, ic, ck]`. - -<>:: - -The `edge_ngram` tokenizer can break up text into words when it encounters any of -a list of specified characters (e.g. whitespace or punctuation), then it returns -n-grams of each word which are anchored to the start of the word, e.g. `quick` -> -`[q, qu, qui, quic, quick]`. - - -[discrete] -=== Structured Text Tokenizers - -The following tokenizers are usually used with structured text like -identifiers, email addresses, zip codes, and paths, rather than with full -text: - -<>:: - -The `keyword` tokenizer is a ``noop'' tokenizer that accepts whatever text it -is given and outputs the exact same text as a single term. It can be combined -with token filters like <> to -normalise the analysed terms. - -<>:: - -The `pattern` tokenizer uses a regular expression to either split text into -terms whenever it matches a word separator, or to capture matching text as -terms. - -<>:: - -The `simple_pattern` tokenizer uses a regular expression to capture matching -text as terms. It uses a restricted subset of regular expression features -and is generally faster than the `pattern` tokenizer. - -<>:: - -The `char_group` tokenizer is configurable through sets of characters to split -on, which is usually less expensive than running regular expressions. - -<>:: - -The `simple_pattern_split` tokenizer uses the same restricted regular expression -subset as the `simple_pattern` tokenizer, but splits the input at matches rather -than returning the matches as terms. - -<>:: - -The `path_hierarchy` tokenizer takes a hierarchical value like a filesystem -path, splits on the path separator, and emits a term for each component in the -tree, e.g. `/foo/bar/baz` -> `[/foo, /foo/bar, /foo/bar/baz ]`. - - -include::tokenizers/chargroup-tokenizer.asciidoc[] - -include::tokenizers/classic-tokenizer.asciidoc[] - -include::tokenizers/edgengram-tokenizer.asciidoc[] - -include::tokenizers/keyword-tokenizer.asciidoc[] - -include::tokenizers/letter-tokenizer.asciidoc[] - -include::tokenizers/lowercase-tokenizer.asciidoc[] - -include::tokenizers/ngram-tokenizer.asciidoc[] - -include::tokenizers/pathhierarchy-tokenizer.asciidoc[] - -include::tokenizers/pattern-tokenizer.asciidoc[] - -include::tokenizers/simplepattern-tokenizer.asciidoc[] - -include::tokenizers/simplepatternsplit-tokenizer.asciidoc[] - -include::tokenizers/standard-tokenizer.asciidoc[] - -include::tokenizers/thai-tokenizer.asciidoc[] - -include::tokenizers/uaxurlemail-tokenizer.asciidoc[] - -include::tokenizers/whitespace-tokenizer.asciidoc[] diff --git a/docs/reference/analysis/tokenizers/chargroup-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/chargroup-tokenizer.asciidoc deleted file mode 100644 index 84a29dc5718e9..0000000000000 --- a/docs/reference/analysis/tokenizers/chargroup-tokenizer.asciidoc +++ /dev/null @@ -1,84 +0,0 @@ -[[analysis-chargroup-tokenizer]] -=== Character group tokenizer -++++ -Character group -++++ - -The `char_group` tokenizer breaks text into terms whenever it encounters a -character which is in a defined set. It is mostly useful for cases where a simple -custom tokenization is desired, and the overhead of use of the <> -is not acceptable. - -[discrete] -=== Configuration - -The `char_group` tokenizer accepts one parameter: - -[horizontal] -`tokenize_on_chars`:: - A list containing a list of characters to tokenize the string on. Whenever a character - from this list is encountered, a new token is started. This accepts either single - characters like e.g. `-`, or character groups: `whitespace`, `letter`, `digit`, - `punctuation`, `symbol`. - -`max_token_length`:: - The maximum token length. If a token is seen that exceeds this length then - it is split at `max_token_length` intervals. Defaults to `255`. - - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": { - "type": "char_group", - "tokenize_on_chars": [ - "whitespace", - "-", - "\n" - ] - }, - "text": "The QUICK brown-fox" -} ---------------------------- - -returns - -[source,console-result] ---------------------------- -{ - "tokens": [ - { - "token": "The", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "QUICK", - "start_offset": 4, - "end_offset": 9, - "type": "word", - "position": 1 - }, - { - "token": "brown", - "start_offset": 10, - "end_offset": 15, - "type": "word", - "position": 2 - }, - { - "token": "fox", - "start_offset": 16, - "end_offset": 19, - "type": "word", - "position": 3 - } - ] -} ---------------------------- diff --git a/docs/reference/analysis/tokenizers/classic-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/classic-tokenizer.asciidoc deleted file mode 100644 index f617fddb1bcaa..0000000000000 --- a/docs/reference/analysis/tokenizers/classic-tokenizer.asciidoc +++ /dev/null @@ -1,263 +0,0 @@ -[[analysis-classic-tokenizer]] -=== Classic tokenizer -++++ -Classic -++++ - -The `classic` tokenizer is a grammar based tokenizer that is good for English -language documents. This tokenizer has heuristics for special treatment of -acronyms, company names, email addresses, and internet host names. However, -these rules don't always work, and the tokenizer doesn't work well for most -languages other than English: - -* It splits words at most punctuation characters, removing punctuation. However, a - dot that's not followed by whitespace is considered part of a token. - -* It splits words at hyphens, unless there's a number in the token, in which case - the whole token is interpreted as a product number and is not split. - -* It recognizes email addresses and internet hostnames as one token. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "classic", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "The", - "start_offset": 0, - "end_offset": 3, - "type": "", - "position": 0 - }, - { - "token": "2", - "start_offset": 4, - "end_offset": 5, - "type": "", - "position": 1 - }, - { - "token": "QUICK", - "start_offset": 6, - "end_offset": 11, - "type": "", - "position": 2 - }, - { - "token": "Brown", - "start_offset": 12, - "end_offset": 17, - "type": "", - "position": 3 - }, - { - "token": "Foxes", - "start_offset": 18, - "end_offset": 23, - "type": "", - "position": 4 - }, - { - "token": "jumped", - "start_offset": 24, - "end_offset": 30, - "type": "", - "position": 5 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "", - "position": 6 - }, - { - "token": "the", - "start_offset": 36, - "end_offset": 39, - "type": "", - "position": 7 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "", - "position": 8 - }, - { - "token": "dog's", - "start_offset": 45, - "end_offset": 50, - "type": "", - "position": 9 - }, - { - "token": "bone", - "start_offset": 51, - "end_offset": 55, - "type": "", - "position": 10 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ The, 2, QUICK, Brown, Foxes, jumped, over, the, lazy, dog's, bone ] ---------------------------- - -[discrete] -=== Configuration - -The `classic` tokenizer accepts the following parameters: - -[horizontal] -`max_token_length`:: - - The maximum token length. If a token is seen that exceeds this length then - it is split at `max_token_length` intervals. Defaults to `255`. - -[discrete] -=== Example configuration - -In this example, we configure the `classic` tokenizer to have a -`max_token_length` of 5 (for demonstration purposes): - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "my_tokenizer" - } - }, - "tokenizer": { - "my_tokenizer": { - "type": "classic", - "max_token_length": 5 - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_analyzer", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "The", - "start_offset": 0, - "end_offset": 3, - "type": "", - "position": 0 - }, - { - "token": "2", - "start_offset": 4, - "end_offset": 5, - "type": "", - "position": 1 - }, - { - "token": "QUICK", - "start_offset": 6, - "end_offset": 11, - "type": "", - "position": 2 - }, - { - "token": "Brown", - "start_offset": 12, - "end_offset": 17, - "type": "", - "position": 3 - }, - { - "token": "Foxes", - "start_offset": 18, - "end_offset": 23, - "type": "", - "position": 4 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "", - "position": 6 - }, - { - "token": "the", - "start_offset": 36, - "end_offset": 39, - "type": "", - "position": 7 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "", - "position": 8 - }, - { - "token": "dog's", - "start_offset": 45, - "end_offset": 50, - "type": "", - "position": 9 - }, - { - "token": "bone", - "start_offset": 51, - "end_offset": 55, - "type": "", - "position": 10 - } - ] -} ----------------------------- - -///////////////////// - - -The above example produces the following terms: - -[source,text] ---------------------------- -[ The, 2, QUICK, Brown, Foxes, jumpe, d, over, the, lazy, dog's, bone ] ---------------------------- diff --git a/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc deleted file mode 100644 index 7bd6bb6f0e85a..0000000000000 --- a/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc +++ /dev/null @@ -1,359 +0,0 @@ -[[analysis-edgengram-tokenizer]] -=== Edge n-gram tokenizer -++++ -Edge n-gram -++++ - -The `edge_ngram` tokenizer first breaks text down into words whenever it -encounters one of a list of specified characters, then it emits -{wikipedia}/N-gram[N-grams] of each word where the start of -the N-gram is anchored to the beginning of the word. - -Edge N-Grams are useful for _search-as-you-type_ queries. - -TIP: When you need _search-as-you-type_ for text which has a widely known -order, such as movie or song titles, the -<> is a much more efficient -choice than edge N-grams. Edge N-grams have the advantage when trying to -autocomplete words that can appear in any order. - -[discrete] -=== Example output - -With the default settings, the `edge_ngram` tokenizer treats the initial text as a -single token and produces N-grams with minimum length `1` and maximum length -`2`: - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "edge_ngram", - "text": "Quick Fox" -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "Q", - "start_offset": 0, - "end_offset": 1, - "type": "word", - "position": 0 - }, - { - "token": "Qu", - "start_offset": 0, - "end_offset": 2, - "type": "word", - "position": 1 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ Q, Qu ] ---------------------------- - -NOTE: These default gram lengths are almost entirely useless. You need to -configure the `edge_ngram` before using it. - -[discrete] -=== Configuration - -The `edge_ngram` tokenizer accepts the following parameters: - -`min_gram`:: - Minimum length of characters in a gram. Defaults to `1`. - -`max_gram`:: -+ --- -Maximum length of characters in a gram. Defaults to `2`. - -See <>. --- - -`token_chars`:: - - Character classes that should be included in a token. Elasticsearch - will split on characters that don't belong to the classes specified. - Defaults to `[]` (keep all characters). -+ -Character classes may be any of the following: -+ -* `letter` -- for example `a`, `b`, `ï` or `京` -* `digit` -- for example `3` or `7` -* `whitespace` -- for example `" "` or `"\n"` -* `punctuation` -- for example `!` or `"` -* `symbol` -- for example `$` or `√` -* `custom` -- custom characters which need to be set using the -`custom_token_chars` setting. - -`custom_token_chars`:: - - Custom characters that should be treated as part of a token. For example, - setting this to `+-_` will make the tokenizer treat the plus, minus and - underscore sign as part of a token. - -[discrete] -[[max-gram-limits]] -=== Limitations of the `max_gram` parameter - -The `edge_ngram` tokenizer's `max_gram` value limits the character length of -tokens. When the `edge_ngram` tokenizer is used with an index analyzer, this -means search terms longer than the `max_gram` length may not match any indexed -terms. - -For example, if the `max_gram` is `3`, searches for `apple` won't match the -indexed term `app`. - -To account for this, you can use the -<> token filter with a search analyzer -to shorten search terms to the `max_gram` character length. However, this could -return irrelevant results. - -For example, if the `max_gram` is `3` and search terms are truncated to three -characters, the search term `apple` is shortened to `app`. This means searches -for `apple` return any indexed terms matching `app`, such as `apply`, `approximate` -and `apple`. - -We recommend testing both approaches to see which best fits your -use case and desired search experience. - -[discrete] -=== Example configuration - -In this example, we configure the `edge_ngram` tokenizer to treat letters and -digits as tokens, and to produce grams with minimum length `2` and maximum -length `10`: - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "my_tokenizer" - } - }, - "tokenizer": { - "my_tokenizer": { - "type": "edge_ngram", - "min_gram": 2, - "max_gram": 10, - "token_chars": [ - "letter", - "digit" - ] - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_analyzer", - "text": "2 Quick Foxes." -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "Qu", - "start_offset": 2, - "end_offset": 4, - "type": "word", - "position": 0 - }, - { - "token": "Qui", - "start_offset": 2, - "end_offset": 5, - "type": "word", - "position": 1 - }, - { - "token": "Quic", - "start_offset": 2, - "end_offset": 6, - "type": "word", - "position": 2 - }, - { - "token": "Quick", - "start_offset": 2, - "end_offset": 7, - "type": "word", - "position": 3 - }, - { - "token": "Fo", - "start_offset": 8, - "end_offset": 10, - "type": "word", - "position": 4 - }, - { - "token": "Fox", - "start_offset": 8, - "end_offset": 11, - "type": "word", - "position": 5 - }, - { - "token": "Foxe", - "start_offset": 8, - "end_offset": 12, - "type": "word", - "position": 6 - }, - { - "token": "Foxes", - "start_offset": 8, - "end_offset": 13, - "type": "word", - "position": 7 - } - ] -} ----------------------------- - -///////////////////// - -The above example produces the following terms: - -[source,text] ---------------------------- -[ Qu, Qui, Quic, Quick, Fo, Fox, Foxe, Foxes ] ---------------------------- - -Usually we recommend using the same `analyzer` at index time and at search -time. In the case of the `edge_ngram` tokenizer, the advice is different. It -only makes sense to use the `edge_ngram` tokenizer at index time, to ensure -that partial words are available for matching in the index. At search time, -just search for the terms the user has typed in, for instance: `Quick Fo`. - -Below is an example of how to set up a field for _search-as-you-type_. - -Note that the `max_gram` value for the index analyzer is `10`, which limits -indexed terms to 10 characters. Search terms are not truncated, meaning that -search terms longer than 10 characters may not match any indexed terms. - -[source,console] ------------------------------------ -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "autocomplete": { - "tokenizer": "autocomplete", - "filter": [ - "lowercase" - ] - }, - "autocomplete_search": { - "tokenizer": "lowercase" - } - }, - "tokenizer": { - "autocomplete": { - "type": "edge_ngram", - "min_gram": 2, - "max_gram": 10, - "token_chars": [ - "letter" - ] - } - } - } - }, - "mappings": { - "properties": { - "title": { - "type": "text", - "analyzer": "autocomplete", - "search_analyzer": "autocomplete_search" - } - } - } -} - -PUT my-index-000001/_doc/1 -{ - "title": "Quick Foxes" <1> -} - -POST my-index-000001/_refresh - -GET my-index-000001/_search -{ - "query": { - "match": { - "title": { - "query": "Quick Fo", <2> - "operator": "and" - } - } - } -} ------------------------------------ - -<1> The `autocomplete` analyzer indexes the terms `[qu, qui, quic, quick, fo, fox, foxe, foxes]`. -<2> The `autocomplete_search` analyzer searches for the terms `[quick, fo]`, both of which appear in the index. - -///////////////////// - -[source,console-result] ----------------------------- -{ - "took": $body.took, - "timed_out": false, - "_shards": { - "total": 1, - "successful": 1, - "skipped" : 0, - "failed": 0 - }, - "hits": { - "total" : { - "value": 1, - "relation": "eq" - }, - "max_score": 0.5753642, - "hits": [ - { - "_index": "my-index-000001", - "_id": "1", - "_score": 0.5753642, - "_source": { - "title": "Quick Foxes" - } - } - ] - } -} ----------------------------- -// TESTRESPONSE[s/"took".*/"took": "$body.took",/] -///////////////////// diff --git a/docs/reference/analysis/tokenizers/keyword-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/keyword-tokenizer.asciidoc deleted file mode 100644 index 53782f1907baf..0000000000000 --- a/docs/reference/analysis/tokenizers/keyword-tokenizer.asciidoc +++ /dev/null @@ -1,109 +0,0 @@ -[[analysis-keyword-tokenizer]] -=== Keyword tokenizer -++++ -Keyword -++++ - -The `keyword` tokenizer is a ``noop'' tokenizer that accepts whatever text it -is given and outputs the exact same text as a single term. It can be combined -with token filters to normalise output, e.g. lower-casing email addresses. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "keyword", - "text": "New York" -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "New York", - "start_offset": 0, - "end_offset": 8, - "type": "word", - "position": 0 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following term: - -[source,text] ---------------------------- -[ New York ] ---------------------------- - -[discrete] -[[analysis-keyword-tokenizer-token-filters]] -=== Combine with token filters -You can combine the `keyword` tokenizer with token filters to normalise -structured data, such as product IDs or email addresses. - -For example, the following <> request uses the -`keyword` tokenizer and <> filter to -convert an email address to lowercase. - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "keyword", - "filter": [ "lowercase" ], - "text": "john.SMITH@example.COM" -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "john.smith@example.com", - "start_offset": 0, - "end_offset": 22, - "type": "word", - "position": 0 - } - ] -} ----------------------------- - -///////////////////// - - -The request produces the following token: - -[source,text] ---------------------------- -[ john.smith@example.com ] ---------------------------- - - -[discrete] -=== Configuration - -The `keyword` tokenizer accepts the following parameters: - -[horizontal] -`buffer_size`:: - - The number of characters read into the term buffer in a single pass. - Defaults to `256`. The term buffer will grow by this size until all the - text has been consumed. It is advisable not to change this setting. - diff --git a/docs/reference/analysis/tokenizers/letter-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/letter-tokenizer.asciidoc deleted file mode 100644 index c5b809fac1c2b..0000000000000 --- a/docs/reference/analysis/tokenizers/letter-tokenizer.asciidoc +++ /dev/null @@ -1,124 +0,0 @@ -[[analysis-letter-tokenizer]] -=== Letter tokenizer -++++ -Letter -++++ - -The `letter` tokenizer breaks text into terms whenever it encounters a -character which is not a letter. It does a reasonable job for most European -languages, but does a terrible job for some Asian languages, where words are -not separated by spaces. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "letter", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "The", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "QUICK", - "start_offset": 6, - "end_offset": 11, - "type": "word", - "position": 1 - }, - { - "token": "Brown", - "start_offset": 12, - "end_offset": 17, - "type": "word", - "position": 2 - }, - { - "token": "Foxes", - "start_offset": 18, - "end_offset": 23, - "type": "word", - "position": 3 - }, - { - "token": "jumped", - "start_offset": 24, - "end_offset": 30, - "type": "word", - "position": 4 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "word", - "position": 5 - }, - { - "token": "the", - "start_offset": 36, - "end_offset": 39, - "type": "word", - "position": 6 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "word", - "position": 7 - }, - { - "token": "dog", - "start_offset": 45, - "end_offset": 48, - "type": "word", - "position": 8 - }, - { - "token": "s", - "start_offset": 49, - "end_offset": 50, - "type": "word", - "position": 9 - }, - { - "token": "bone", - "start_offset": 51, - "end_offset": 55, - "type": "word", - "position": 10 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ The, QUICK, Brown, Foxes, jumped, over, the, lazy, dog, s, bone ] ---------------------------- - -[discrete] -=== Configuration - -The `letter` tokenizer is not configurable. diff --git a/docs/reference/analysis/tokenizers/lowercase-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/lowercase-tokenizer.asciidoc deleted file mode 100644 index 5a38313fb5d94..0000000000000 --- a/docs/reference/analysis/tokenizers/lowercase-tokenizer.asciidoc +++ /dev/null @@ -1,128 +0,0 @@ -[[analysis-lowercase-tokenizer]] -=== Lowercase tokenizer -++++ -Lowercase -++++ - -The `lowercase` tokenizer, like the -<> breaks text into terms -whenever it encounters a character which is not a letter, but it also -lowercases all terms. It is functionally equivalent to the -<> combined with the -<>, but is more -efficient as it performs both steps in a single pass. - - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "lowercase", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "the", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "quick", - "start_offset": 6, - "end_offset": 11, - "type": "word", - "position": 1 - }, - { - "token": "brown", - "start_offset": 12, - "end_offset": 17, - "type": "word", - "position": 2 - }, - { - "token": "foxes", - "start_offset": 18, - "end_offset": 23, - "type": "word", - "position": 3 - }, - { - "token": "jumped", - "start_offset": 24, - "end_offset": 30, - "type": "word", - "position": 4 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "word", - "position": 5 - }, - { - "token": "the", - "start_offset": 36, - "end_offset": 39, - "type": "word", - "position": 6 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "word", - "position": 7 - }, - { - "token": "dog", - "start_offset": 45, - "end_offset": 48, - "type": "word", - "position": 8 - }, - { - "token": "s", - "start_offset": 49, - "end_offset": 50, - "type": "word", - "position": 9 - }, - { - "token": "bone", - "start_offset": 51, - "end_offset": 55, - "type": "word", - "position": 10 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ the, quick, brown, foxes, jumped, over, the, lazy, dog, s, bone ] ---------------------------- - -[discrete] -=== Configuration - -The `lowercase` tokenizer is not configurable. diff --git a/docs/reference/analysis/tokenizers/ngram-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/ngram-tokenizer.asciidoc deleted file mode 100644 index 0c244734a4839..0000000000000 --- a/docs/reference/analysis/tokenizers/ngram-tokenizer.asciidoc +++ /dev/null @@ -1,312 +0,0 @@ -[[analysis-ngram-tokenizer]] -=== N-gram tokenizer -++++ -N-gram -++++ - -The `ngram` tokenizer first breaks text down into words whenever it encounters -one of a list of specified characters, then it emits -{wikipedia}/N-gram[N-grams] of each word of the specified -length. - -N-grams are like a sliding window that moves across the word - a continuous -sequence of characters of the specified length. They are useful for querying -languages that don't use spaces or that have long compound words, like German. - -[discrete] -=== Example output - -With the default settings, the `ngram` tokenizer treats the initial text as a -single token and produces N-grams with minimum length `1` and maximum length -`2`: - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "ngram", - "text": "Quick Fox" -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "Q", - "start_offset": 0, - "end_offset": 1, - "type": "word", - "position": 0 - }, - { - "token": "Qu", - "start_offset": 0, - "end_offset": 2, - "type": "word", - "position": 1 - }, - { - "token": "u", - "start_offset": 1, - "end_offset": 2, - "type": "word", - "position": 2 - }, - { - "token": "ui", - "start_offset": 1, - "end_offset": 3, - "type": "word", - "position": 3 - }, - { - "token": "i", - "start_offset": 2, - "end_offset": 3, - "type": "word", - "position": 4 - }, - { - "token": "ic", - "start_offset": 2, - "end_offset": 4, - "type": "word", - "position": 5 - }, - { - "token": "c", - "start_offset": 3, - "end_offset": 4, - "type": "word", - "position": 6 - }, - { - "token": "ck", - "start_offset": 3, - "end_offset": 5, - "type": "word", - "position": 7 - }, - { - "token": "k", - "start_offset": 4, - "end_offset": 5, - "type": "word", - "position": 8 - }, - { - "token": "k ", - "start_offset": 4, - "end_offset": 6, - "type": "word", - "position": 9 - }, - { - "token": " ", - "start_offset": 5, - "end_offset": 6, - "type": "word", - "position": 10 - }, - { - "token": " F", - "start_offset": 5, - "end_offset": 7, - "type": "word", - "position": 11 - }, - { - "token": "F", - "start_offset": 6, - "end_offset": 7, - "type": "word", - "position": 12 - }, - { - "token": "Fo", - "start_offset": 6, - "end_offset": 8, - "type": "word", - "position": 13 - }, - { - "token": "o", - "start_offset": 7, - "end_offset": 8, - "type": "word", - "position": 14 - }, - { - "token": "ox", - "start_offset": 7, - "end_offset": 9, - "type": "word", - "position": 15 - }, - { - "token": "x", - "start_offset": 8, - "end_offset": 9, - "type": "word", - "position": 16 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ Q, Qu, u, ui, i, ic, c, ck, k, "k ", " ", " F", F, Fo, o, ox, x ] ---------------------------- - -[discrete] -=== Configuration - -The `ngram` tokenizer accepts the following parameters: - -[horizontal] -`min_gram`:: - Minimum length of characters in a gram. Defaults to `1`. - -`max_gram`:: - Maximum length of characters in a gram. Defaults to `2`. - -`token_chars`:: - - Character classes that should be included in a token. Elasticsearch - will split on characters that don't belong to the classes specified. - Defaults to `[]` (keep all characters). -+ -Character classes may be any of the following: -+ -* `letter` -- for example `a`, `b`, `ï` or `京` -* `digit` -- for example `3` or `7` -* `whitespace` -- for example `" "` or `"\n"` -* `punctuation` -- for example `!` or `"` -* `symbol` -- for example `$` or `√` -* `custom` -- custom characters which need to be set using the -`custom_token_chars` setting. - -`custom_token_chars`:: - - Custom characters that should be treated as part of a token. For example, - setting this to `+-_` will make the tokenizer treat the plus, minus and - underscore sign as part of a token. - -TIP: It usually makes sense to set `min_gram` and `max_gram` to the same -value. The smaller the length, the more documents will match but the lower -the quality of the matches. The longer the length, the more specific the -matches. A tri-gram (length `3`) is a good place to start. - -The index level setting `index.max_ngram_diff` controls the maximum allowed -difference between `max_gram` and `min_gram`. - -[discrete] -=== Example configuration - -In this example, we configure the `ngram` tokenizer to treat letters and -digits as tokens, and to produce tri-grams (grams of length `3`): - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "my_tokenizer" - } - }, - "tokenizer": { - "my_tokenizer": { - "type": "ngram", - "min_gram": 3, - "max_gram": 3, - "token_chars": [ - "letter", - "digit" - ] - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_analyzer", - "text": "2 Quick Foxes." -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "Qui", - "start_offset": 2, - "end_offset": 5, - "type": "word", - "position": 0 - }, - { - "token": "uic", - "start_offset": 3, - "end_offset": 6, - "type": "word", - "position": 1 - }, - { - "token": "ick", - "start_offset": 4, - "end_offset": 7, - "type": "word", - "position": 2 - }, - { - "token": "Fox", - "start_offset": 8, - "end_offset": 11, - "type": "word", - "position": 3 - }, - { - "token": "oxe", - "start_offset": 9, - "end_offset": 12, - "type": "word", - "position": 4 - }, - { - "token": "xes", - "start_offset": 10, - "end_offset": 13, - "type": "word", - "position": 5 - } - ] -} ----------------------------- - -///////////////////// - - -The above example produces the following terms: - -[source,text] ---------------------------- -[ Qui, uic, ick, Fox, oxe, xes ] ---------------------------- diff --git a/docs/reference/analysis/tokenizers/pathhierarchy-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/pathhierarchy-tokenizer.asciidoc deleted file mode 100644 index 5f98807387280..0000000000000 --- a/docs/reference/analysis/tokenizers/pathhierarchy-tokenizer.asciidoc +++ /dev/null @@ -1,364 +0,0 @@ -[[analysis-pathhierarchy-tokenizer]] -=== Path hierarchy tokenizer -++++ -Path hierarchy -++++ - -The `path_hierarchy` tokenizer takes a hierarchical value like a filesystem -path, splits on the path separator, and emits a term for each component in the -tree. The `path_hierarcy` tokenizer uses Lucene's -https://lucene.apache.org/core/{lucene_version_path}/analysis/common/org/apache/lucene/analysis/path/PathHierarchyTokenizer.html[PathHierarchyTokenizer] -underneath. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "path_hierarchy", - "text": "/one/two/three" -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "/one", - "start_offset": 0, - "end_offset": 4, - "type": "word", - "position": 0 - }, - { - "token": "/one/two", - "start_offset": 0, - "end_offset": 8, - "type": "word", - "position": 1 - }, - { - "token": "/one/two/three", - "start_offset": 0, - "end_offset": 14, - "type": "word", - "position": 2 - } - ] -} ----------------------------- - -///////////////////// - - - -The above text would produce the following terms: - -[source,text] ---------------------------- -[ /one, /one/two, /one/two/three ] ---------------------------- - -[discrete] -=== Configuration - -The `path_hierarchy` tokenizer accepts the following parameters: - -[horizontal] -`delimiter`:: - The character to use as the path separator. Defaults to `/`. - -`replacement`:: - An optional replacement character to use for the delimiter. - Defaults to the `delimiter`. - -`buffer_size`:: - The number of characters read into the term buffer in a single pass. - Defaults to `1024`. The term buffer will grow by this size until all the - text has been consumed. It is advisable not to change this setting. - -`reverse`:: - If `true`, uses Lucene's - http://lucene.apache.org/core/{lucene_version_path}/analysis/common/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.html[ReversePathHierarchyTokenizer], - which is suitable for domain–like hierarchies. Defaults to `false`. - -`skip`:: - The number of initial tokens to skip. Defaults to `0`. - -[discrete] -=== Example configuration - -In this example, we configure the `path_hierarchy` tokenizer to split on `-` -characters, and to replace them with `/`. The first two tokens are skipped: - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "my_tokenizer" - } - }, - "tokenizer": { - "my_tokenizer": { - "type": "path_hierarchy", - "delimiter": "-", - "replacement": "/", - "skip": 2 - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_analyzer", - "text": "one-two-three-four-five" -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "/three", - "start_offset": 7, - "end_offset": 13, - "type": "word", - "position": 0 - }, - { - "token": "/three/four", - "start_offset": 7, - "end_offset": 18, - "type": "word", - "position": 1 - }, - { - "token": "/three/four/five", - "start_offset": 7, - "end_offset": 23, - "type": "word", - "position": 2 - } - ] -} ----------------------------- - -///////////////////// - - -The above example produces the following terms: - -[source,text] ---------------------------- -[ /three, /three/four, /three/four/five ] ---------------------------- - -If we were to set `reverse` to `true`, it would produce the following: - -[source,text] ---------------------------- -[ one/two/three/, two/three/, three/ ] ---------------------------- - -[discrete] -[[analysis-pathhierarchy-tokenizer-detailed-examples]] -=== Detailed examples - -A common use-case for the `path_hierarchy` tokenizer is filtering results by -file paths. If indexing a file path along with the data, the use of the -`path_hierarchy` tokenizer to analyze the path allows filtering the results -by different parts of the file path string. - - -This example configures an index to have two custom analyzers and applies -those analyzers to multifields of the `file_path` text field that will -store filenames. One of the two analyzers uses reverse tokenization. -Some sample documents are then indexed to represent some file paths -for photos inside photo folders of two different users. - - -[source,console] --------------------------------------------------- -PUT file-path-test -{ - "settings": { - "analysis": { - "analyzer": { - "custom_path_tree": { - "tokenizer": "custom_hierarchy" - }, - "custom_path_tree_reversed": { - "tokenizer": "custom_hierarchy_reversed" - } - }, - "tokenizer": { - "custom_hierarchy": { - "type": "path_hierarchy", - "delimiter": "/" - }, - "custom_hierarchy_reversed": { - "type": "path_hierarchy", - "delimiter": "/", - "reverse": "true" - } - } - } - }, - "mappings": { - "properties": { - "file_path": { - "type": "text", - "fields": { - "tree": { - "type": "text", - "analyzer": "custom_path_tree" - }, - "tree_reversed": { - "type": "text", - "analyzer": "custom_path_tree_reversed" - } - } - } - } - } -} - -POST file-path-test/_doc/1 -{ - "file_path": "/User/alice/photos/2017/05/16/my_photo1.jpg" -} - -POST file-path-test/_doc/2 -{ - "file_path": "/User/alice/photos/2017/05/16/my_photo2.jpg" -} - -POST file-path-test/_doc/3 -{ - "file_path": "/User/alice/photos/2017/05/16/my_photo3.jpg" -} - -POST file-path-test/_doc/4 -{ - "file_path": "/User/alice/photos/2017/05/15/my_photo1.jpg" -} - -POST file-path-test/_doc/5 -{ - "file_path": "/User/bob/photos/2017/05/16/my_photo1.jpg" -} --------------------------------------------------- - - -A search for a particular file path string against the text field matches all -the example documents, with Bob's documents ranking highest due to `bob` also -being one of the terms created by the standard analyzer boosting relevance for -Bob's documents. - -[source,console] --------------------------------------------------- -GET file-path-test/_search -{ - "query": { - "match": { - "file_path": "/User/bob/photos/2017/05" - } - } -} --------------------------------------------------- -// TEST[continued] - -It's simple to match or filter documents with file paths that exist within a -particular directory using the `file_path.tree` field. - -[source,console] --------------------------------------------------- -GET file-path-test/_search -{ - "query": { - "term": { - "file_path.tree": "/User/alice/photos/2017/05/16" - } - } -} --------------------------------------------------- -// TEST[continued] - -With the reverse parameter for this tokenizer, it's also possible to match -from the other end of the file path, such as individual file names or a deep -level subdirectory. The following example shows a search for all files named -`my_photo1.jpg` within any directory via the `file_path.tree_reversed` field -configured to use the reverse parameter in the mapping. - - -[source,console] --------------------------------------------------- -GET file-path-test/_search -{ - "query": { - "term": { - "file_path.tree_reversed": { - "value": "my_photo1.jpg" - } - } - } -} --------------------------------------------------- -// TEST[continued] - -Viewing the tokens generated with both forward and reverse is instructive -in showing the tokens created for the same file path value. - - -[source,console] --------------------------------------------------- -POST file-path-test/_analyze -{ - "analyzer": "custom_path_tree", - "text": "/User/alice/photos/2017/05/16/my_photo1.jpg" -} - -POST file-path-test/_analyze -{ - "analyzer": "custom_path_tree_reversed", - "text": "/User/alice/photos/2017/05/16/my_photo1.jpg" -} --------------------------------------------------- -// TEST[continued] - - -It's also useful to be able to filter with file paths when combined with other -types of searches, such as this example looking for any files paths with `16` -that also must be in Alice's photo directory. - -[source,console] --------------------------------------------------- -GET file-path-test/_search -{ - "query": { - "bool" : { - "must" : { - "match" : { "file_path" : "16" } - }, - "filter": { - "term" : { "file_path.tree" : "/User/alice" } - } - } - } -} --------------------------------------------------- -// TEST[continued] diff --git a/docs/reference/analysis/tokenizers/pattern-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/pattern-tokenizer.asciidoc deleted file mode 100644 index 75866dff7430d..0000000000000 --- a/docs/reference/analysis/tokenizers/pattern-tokenizer.asciidoc +++ /dev/null @@ -1,275 +0,0 @@ -[[analysis-pattern-tokenizer]] -=== Pattern tokenizer -++++ -Pattern -++++ - -The `pattern` tokenizer uses a regular expression to either split text into -terms whenever it matches a word separator, or to capture matching text as -terms. - -The default pattern is `\W+`, which splits text whenever it encounters -non-word characters. - -[WARNING] -.Beware of Pathological Regular Expressions -======================================== - -The pattern tokenizer uses -https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html[Java Regular Expressions]. - -A badly written regular expression could run very slowly or even throw a -StackOverflowError and cause the node it is running on to exit suddenly. - -Read more about https://www.regular-expressions.info/catastrophic.html[pathological regular expressions and how to avoid them]. - -======================================== - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "pattern", - "text": "The foo_bar_size's default is 5." -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "The", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "foo_bar_size", - "start_offset": 4, - "end_offset": 16, - "type": "word", - "position": 1 - }, - { - "token": "s", - "start_offset": 17, - "end_offset": 18, - "type": "word", - "position": 2 - }, - { - "token": "default", - "start_offset": 19, - "end_offset": 26, - "type": "word", - "position": 3 - }, - { - "token": "is", - "start_offset": 27, - "end_offset": 29, - "type": "word", - "position": 4 - }, - { - "token": "5", - "start_offset": 30, - "end_offset": 31, - "type": "word", - "position": 5 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ The, foo_bar_size, s, default, is, 5 ] ---------------------------- - -[discrete] -=== Configuration - -The `pattern` tokenizer accepts the following parameters: - -[horizontal] -`pattern`:: - - A https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html[Java regular expression], defaults to `\W+`. - -`flags`:: - - Java regular expression https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html#field.summary[flags]. - Flags should be pipe-separated, eg `"CASE_INSENSITIVE|COMMENTS"`. - -`group`:: - - Which capture group to extract as tokens. Defaults to `-1` (split). - -[discrete] -=== Example configuration - -In this example, we configure the `pattern` tokenizer to break text into -tokens when it encounters commas: - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "my_tokenizer" - } - }, - "tokenizer": { - "my_tokenizer": { - "type": "pattern", - "pattern": "," - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_analyzer", - "text": "comma,separated,values" -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "comma", - "start_offset": 0, - "end_offset": 5, - "type": "word", - "position": 0 - }, - { - "token": "separated", - "start_offset": 6, - "end_offset": 15, - "type": "word", - "position": 1 - }, - { - "token": "values", - "start_offset": 16, - "end_offset": 22, - "type": "word", - "position": 2 - } - ] -} ----------------------------- - -///////////////////// - - -The above example produces the following terms: - -[source,text] ---------------------------- -[ comma, separated, values ] ---------------------------- - -In the next example, we configure the `pattern` tokenizer to capture values -enclosed in double quotes (ignoring embedded escaped quotes `\"`). The regex -itself looks like this: - - "((?:\\"|[^"]|\\")*)" - -And reads as follows: - -* A literal `"` -* Start capturing: -** A literal `\"` OR any character except `"` -** Repeat until no more characters match -* A literal closing `"` - -When the pattern is specified in JSON, the `"` and `\` characters need to be -escaped, so the pattern ends up looking like: - - \"((?:\\\\\"|[^\"]|\\\\\")+)\" - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "my_tokenizer" - } - }, - "tokenizer": { - "my_tokenizer": { - "type": "pattern", - "pattern": "\"((?:\\\\\"|[^\"]|\\\\\")+)\"", - "group": 1 - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_analyzer", - "text": "\"value\", \"value with embedded \\\" quote\"" -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "value", - "start_offset": 1, - "end_offset": 6, - "type": "word", - "position": 0 - }, - { - "token": "value with embedded \\\" quote", - "start_offset": 10, - "end_offset": 38, - "type": "word", - "position": 1 - } - ] -} ----------------------------- - -///////////////////// - -The above example produces the following two terms: - -[source,text] ---------------------------- -[ value, value with embedded \" quote ] ---------------------------- diff --git a/docs/reference/analysis/tokenizers/simplepattern-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/simplepattern-tokenizer.asciidoc deleted file mode 100644 index 3e3de000275f3..0000000000000 --- a/docs/reference/analysis/tokenizers/simplepattern-tokenizer.asciidoc +++ /dev/null @@ -1,104 +0,0 @@ -[[analysis-simplepattern-tokenizer]] -=== Simple pattern tokenizer -++++ -Simple pattern -++++ - -The `simple_pattern` tokenizer uses a regular expression to capture matching -text as terms. The set of regular expression features it supports is more -limited than the <> tokenizer, but the -tokenization is generally faster. - -This tokenizer does not support splitting the input on a pattern match, unlike -the <> tokenizer. To split on pattern -matches using the same restricted regular expression subset, see the -<> tokenizer. - -This tokenizer uses {lucene-core-javadoc}/org/apache/lucene/util/automaton/RegExp.html[Lucene regular expressions]. -For an explanation of the supported features and syntax, see <>. - -The default pattern is the empty string, which produces no terms. This -tokenizer should always be configured with a non-default pattern. - -[discrete] -=== Configuration - -The `simple_pattern` tokenizer accepts the following parameters: - -[horizontal] -`pattern`:: - {lucene-core-javadoc}/org/apache/lucene/util/automaton/RegExp.html[Lucene regular expression], defaults to the empty string. - -[discrete] -=== Example configuration - -This example configures the `simple_pattern` tokenizer to produce terms that are -three-digit numbers - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "my_tokenizer" - } - }, - "tokenizer": { - "my_tokenizer": { - "type": "simple_pattern", - "pattern": "[0123456789]{3}" - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_analyzer", - "text": "fd-786-335-514-x" -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens" : [ - { - "token" : "786", - "start_offset" : 3, - "end_offset" : 6, - "type" : "word", - "position" : 0 - }, - { - "token" : "335", - "start_offset" : 7, - "end_offset" : 10, - "type" : "word", - "position" : 1 - }, - { - "token" : "514", - "start_offset" : 11, - "end_offset" : 14, - "type" : "word", - "position" : 2 - } - ] -} ----------------------------- - -///////////////////// - -The above example produces these terms: - -[source,text] ---------------------------- -[ 786, 335, 514 ] ---------------------------- diff --git a/docs/reference/analysis/tokenizers/simplepatternsplit-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/simplepatternsplit-tokenizer.asciidoc deleted file mode 100644 index 0bf82fa39605e..0000000000000 --- a/docs/reference/analysis/tokenizers/simplepatternsplit-tokenizer.asciidoc +++ /dev/null @@ -1,105 +0,0 @@ -[[analysis-simplepatternsplit-tokenizer]] -=== Simple pattern split tokenizer -++++ -Simple pattern split -++++ - -The `simple_pattern_split` tokenizer uses a regular expression to split the -input into terms at pattern matches. The set of regular expression features it -supports is more limited than the <> -tokenizer, but the tokenization is generally faster. - -This tokenizer does not produce terms from the matches themselves. To produce -terms from matches using patterns in the same restricted regular expression -subset, see the <> -tokenizer. - -This tokenizer uses {lucene-core-javadoc}/org/apache/lucene/util/automaton/RegExp.html[Lucene regular expressions]. -For an explanation of the supported features and syntax, see <>. - -The default pattern is the empty string, which produces one term containing the -full input. This tokenizer should always be configured with a non-default -pattern. - -[discrete] -=== Configuration - -The `simple_pattern_split` tokenizer accepts the following parameters: - -[horizontal] -`pattern`:: - A {lucene-core-javadoc}/org/apache/lucene/util/automaton/RegExp.html[Lucene regular expression], defaults to the empty string. - -[discrete] -=== Example configuration - -This example configures the `simple_pattern_split` tokenizer to split the input -text on underscores. - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "my_tokenizer" - } - }, - "tokenizer": { - "my_tokenizer": { - "type": "simple_pattern_split", - "pattern": "_" - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_analyzer", - "text": "an_underscored_phrase" -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens" : [ - { - "token" : "an", - "start_offset" : 0, - "end_offset" : 2, - "type" : "word", - "position" : 0 - }, - { - "token" : "underscored", - "start_offset" : 3, - "end_offset" : 14, - "type" : "word", - "position" : 1 - }, - { - "token" : "phrase", - "start_offset" : 15, - "end_offset" : 21, - "type" : "word", - "position" : 2 - } - ] -} ----------------------------- - -///////////////////// - -The above example produces these terms: - -[source,text] ---------------------------- -[ an, underscored, phrase ] ---------------------------- diff --git a/docs/reference/analysis/tokenizers/standard-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/standard-tokenizer.asciidoc deleted file mode 100644 index 2ea16ea5f6a26..0000000000000 --- a/docs/reference/analysis/tokenizers/standard-tokenizer.asciidoc +++ /dev/null @@ -1,268 +0,0 @@ -[[analysis-standard-tokenizer]] -=== Standard tokenizer -++++ -Standard -++++ - -The `standard` tokenizer provides grammar based tokenization (based on the -Unicode Text Segmentation algorithm, as specified in -https://unicode.org/reports/tr29/[Unicode Standard Annex #29]) and works well -for most languages. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "standard", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "The", - "start_offset": 0, - "end_offset": 3, - "type": "", - "position": 0 - }, - { - "token": "2", - "start_offset": 4, - "end_offset": 5, - "type": "", - "position": 1 - }, - { - "token": "QUICK", - "start_offset": 6, - "end_offset": 11, - "type": "", - "position": 2 - }, - { - "token": "Brown", - "start_offset": 12, - "end_offset": 17, - "type": "", - "position": 3 - }, - { - "token": "Foxes", - "start_offset": 18, - "end_offset": 23, - "type": "", - "position": 4 - }, - { - "token": "jumped", - "start_offset": 24, - "end_offset": 30, - "type": "", - "position": 5 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "", - "position": 6 - }, - { - "token": "the", - "start_offset": 36, - "end_offset": 39, - "type": "", - "position": 7 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "", - "position": 8 - }, - { - "token": "dog's", - "start_offset": 45, - "end_offset": 50, - "type": "", - "position": 9 - }, - { - "token": "bone", - "start_offset": 51, - "end_offset": 55, - "type": "", - "position": 10 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ The, 2, QUICK, Brown, Foxes, jumped, over, the, lazy, dog's, bone ] ---------------------------- - -[discrete] -=== Configuration - -The `standard` tokenizer accepts the following parameters: - -[horizontal] -`max_token_length`:: - - The maximum token length. If a token is seen that exceeds this length then - it is split at `max_token_length` intervals. Defaults to `255`. - -[discrete] -=== Example configuration - -In this example, we configure the `standard` tokenizer to have a -`max_token_length` of 5 (for demonstration purposes): - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "my_tokenizer" - } - }, - "tokenizer": { - "my_tokenizer": { - "type": "standard", - "max_token_length": 5 - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_analyzer", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "The", - "start_offset": 0, - "end_offset": 3, - "type": "", - "position": 0 - }, - { - "token": "2", - "start_offset": 4, - "end_offset": 5, - "type": "", - "position": 1 - }, - { - "token": "QUICK", - "start_offset": 6, - "end_offset": 11, - "type": "", - "position": 2 - }, - { - "token": "Brown", - "start_offset": 12, - "end_offset": 17, - "type": "", - "position": 3 - }, - { - "token": "Foxes", - "start_offset": 18, - "end_offset": 23, - "type": "", - "position": 4 - }, - { - "token": "jumpe", - "start_offset": 24, - "end_offset": 29, - "type": "", - "position": 5 - }, - { - "token": "d", - "start_offset": 29, - "end_offset": 30, - "type": "", - "position": 6 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "", - "position": 7 - }, - { - "token": "the", - "start_offset": 36, - "end_offset": 39, - "type": "", - "position": 8 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "", - "position": 9 - }, - { - "token": "dog's", - "start_offset": 45, - "end_offset": 50, - "type": "", - "position": 10 - }, - { - "token": "bone", - "start_offset": 51, - "end_offset": 55, - "type": "", - "position": 11 - } - ] -} ----------------------------- - -///////////////////// - - -The above example produces the following terms: - -[source,text] ---------------------------- -[ The, 2, QUICK, Brown, Foxes, jumpe, d, over, the, lazy, dog's, bone ] ---------------------------- diff --git a/docs/reference/analysis/tokenizers/thai-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/thai-tokenizer.asciidoc deleted file mode 100644 index f1cc77c5b085a..0000000000000 --- a/docs/reference/analysis/tokenizers/thai-tokenizer.asciidoc +++ /dev/null @@ -1,107 +0,0 @@ -[[analysis-thai-tokenizer]] -=== Thai tokenizer -++++ -Thai -++++ - -The `thai` tokenizer segments Thai text into words, using the Thai -segmentation algorithm included with Java. Text in other languages in general -will be treated the same as the -<>. - -WARNING: This tokenizer may not be supported by all JREs. It is known to work -with Sun/Oracle and OpenJDK. If your application needs to be fully portable, -consider using the {plugins}/analysis-icu-tokenizer.html[ICU Tokenizer] instead. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "thai", - "text": "การที่ได้ต้องแสดงว่างานดี" -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "การ", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "ที่", - "start_offset": 3, - "end_offset": 6, - "type": "word", - "position": 1 - }, - { - "token": "ได้", - "start_offset": 6, - "end_offset": 9, - "type": "word", - "position": 2 - }, - { - "token": "ต้อง", - "start_offset": 9, - "end_offset": 13, - "type": "word", - "position": 3 - }, - { - "token": "แสดง", - "start_offset": 13, - "end_offset": 17, - "type": "word", - "position": 4 - }, - { - "token": "ว่า", - "start_offset": 17, - "end_offset": 20, - "type": "word", - "position": 5 - }, - { - "token": "งาน", - "start_offset": 20, - "end_offset": 23, - "type": "word", - "position": 6 - }, - { - "token": "ดี", - "start_offset": 23, - "end_offset": 25, - "type": "word", - "position": 7 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ การ, ที่, ได้, ต้อง, แสดง, ว่า, งาน, ดี ] ---------------------------- - -[discrete] -=== Configuration - -The `thai` tokenizer is not configurable. diff --git a/docs/reference/analysis/tokenizers/uaxurlemail-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/uaxurlemail-tokenizer.asciidoc deleted file mode 100644 index 4ec3a035c54c1..0000000000000 --- a/docs/reference/analysis/tokenizers/uaxurlemail-tokenizer.asciidoc +++ /dev/null @@ -1,196 +0,0 @@ -[[analysis-uaxurlemail-tokenizer]] -=== UAX URL email tokenizer -++++ -UAX URL email -++++ - -The `uax_url_email` tokenizer is like the <> except that it -recognises URLs and email addresses as single tokens. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "uax_url_email", - "text": "Email me at john.smith@global-international.com" -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "Email", - "start_offset": 0, - "end_offset": 5, - "type": "", - "position": 0 - }, - { - "token": "me", - "start_offset": 6, - "end_offset": 8, - "type": "", - "position": 1 - }, - { - "token": "at", - "start_offset": 9, - "end_offset": 11, - "type": "", - "position": 2 - }, - { - "token": "john.smith@global-international.com", - "start_offset": 12, - "end_offset": 47, - "type": "", - "position": 3 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ Email, me, at, john.smith@global-international.com ] ---------------------------- - -while the `standard` tokenizer would produce: - -[source,text] ---------------------------- -[ Email, me, at, john.smith, global, international.com ] ---------------------------- - -[discrete] -=== Configuration - -The `uax_url_email` tokenizer accepts the following parameters: - -[horizontal] -`max_token_length`:: - - The maximum token length. If a token is seen that exceeds this length then - it is split at `max_token_length` intervals. Defaults to `255`. - -[discrete] -=== Example configuration - -In this example, we configure the `uax_url_email` tokenizer to have a -`max_token_length` of 5 (for demonstration purposes): - -[source,console] ----------------------------- -PUT my-index-000001 -{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "tokenizer": "my_tokenizer" - } - }, - "tokenizer": { - "my_tokenizer": { - "type": "uax_url_email", - "max_token_length": 5 - } - } - } - } -} - -POST my-index-000001/_analyze -{ - "analyzer": "my_analyzer", - "text": "john.smith@global-international.com" -} ----------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "john", - "start_offset": 0, - "end_offset": 4, - "type": "", - "position": 0 - }, - { - "token": "smith", - "start_offset": 5, - "end_offset": 10, - "type": "", - "position": 1 - }, - { - "token": "globa", - "start_offset": 11, - "end_offset": 16, - "type": "", - "position": 2 - }, - { - "token": "l", - "start_offset": 16, - "end_offset": 17, - "type": "", - "position": 3 - }, - { - "token": "inter", - "start_offset": 18, - "end_offset": 23, - "type": "", - "position": 4 - }, - { - "token": "natio", - "start_offset": 23, - "end_offset": 28, - "type": "", - "position": 5 - }, - { - "token": "nal.c", - "start_offset": 28, - "end_offset": 33, - "type": "", - "position": 6 - }, - { - "token": "om", - "start_offset": 33, - "end_offset": 35, - "type": "", - "position": 7 - } - ] -} ----------------------------- - -///////////////////// - - -The above example produces the following terms: - -[source,text] ---------------------------- -[ john, smith, globa, l, inter, natio, nal.c, om ] ---------------------------- diff --git a/docs/reference/analysis/tokenizers/whitespace-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/whitespace-tokenizer.asciidoc deleted file mode 100644 index 525c4bda4fa9a..0000000000000 --- a/docs/reference/analysis/tokenizers/whitespace-tokenizer.asciidoc +++ /dev/null @@ -1,121 +0,0 @@ -[[analysis-whitespace-tokenizer]] -=== Whitespace tokenizer -++++ -Whitespace -++++ - -The `whitespace` tokenizer breaks text into terms whenever it encounters a -whitespace character. - -[discrete] -=== Example output - -[source,console] ---------------------------- -POST _analyze -{ - "tokenizer": "whitespace", - "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." -} ---------------------------- - -///////////////////// - -[source,console-result] ----------------------------- -{ - "tokens": [ - { - "token": "The", - "start_offset": 0, - "end_offset": 3, - "type": "word", - "position": 0 - }, - { - "token": "2", - "start_offset": 4, - "end_offset": 5, - "type": "word", - "position": 1 - }, - { - "token": "QUICK", - "start_offset": 6, - "end_offset": 11, - "type": "word", - "position": 2 - }, - { - "token": "Brown-Foxes", - "start_offset": 12, - "end_offset": 23, - "type": "word", - "position": 3 - }, - { - "token": "jumped", - "start_offset": 24, - "end_offset": 30, - "type": "word", - "position": 4 - }, - { - "token": "over", - "start_offset": 31, - "end_offset": 35, - "type": "word", - "position": 5 - }, - { - "token": "the", - "start_offset": 36, - "end_offset": 39, - "type": "word", - "position": 6 - }, - { - "token": "lazy", - "start_offset": 40, - "end_offset": 44, - "type": "word", - "position": 7 - }, - { - "token": "dog's", - "start_offset": 45, - "end_offset": 50, - "type": "word", - "position": 8 - }, - { - "token": "bone.", - "start_offset": 51, - "end_offset": 56, - "type": "word", - "position": 9 - } - ] -} ----------------------------- - -///////////////////// - - -The above sentence would produce the following terms: - -[source,text] ---------------------------- -[ The, 2, QUICK, Brown-Foxes, jumped, over, the, lazy, dog's, bone. ] ---------------------------- - -[discrete] -=== Configuration - -The `whitespace` tokenizer accepts the following parameters: - -[horizontal] -`max_token_length`:: - - The maximum token length. If a token is seen that exceeds this length then - it is split at `max_token_length` intervals. Defaults to `255`. diff --git a/docs/reference/api-conventions.asciidoc b/docs/reference/api-conventions.asciidoc deleted file mode 100644 index 545b50df009d3..0000000000000 --- a/docs/reference/api-conventions.asciidoc +++ /dev/null @@ -1,439 +0,0 @@ -[[api-conventions]] -== API conventions - -The {es} REST APIs are exposed over HTTP. -Except where noted, the following conventions apply across all APIs. - -[discrete] -=== Content-type requirements - -The type of the content sent in a request body must be specified using -the `Content-Type` header. The value of this header must map to one of -the supported formats that the API supports. Most APIs support JSON, -YAML, CBOR, and SMILE. The bulk and multi-search APIs support NDJSON, -JSON, and SMILE; other types will result in an error response. - -When using the `source` query string parameter, the content type must be -specified using the `source_content_type` query string parameter. - -{es} only supports UTF-8-encoded JSON. {es} ignores any other encoding headings -sent with a request. Responses are also UTF-8 encoded. - -[discrete] -[[x-opaque-id]] -=== `X-Opaque-Id` HTTP header - -You can pass an `X-Opaque-Id` HTTP header to track the origin of a request in -{es} logs and tasks. If provided, {es} surfaces the `X-Opaque-Id` value in the: - -* Response of any request that includes the header -* <<_identifying_running_tasks,Task management API>> response -* <> -* <> - -For the deprecation logs, {es} also uses the `X-Opaque-Id` value to throttle -and deduplicate deprecation warnings. See <<_deprecation_logs_throttling>>. - -The `X-Opaque-Id` header accepts any arbitrary value. However, we recommend you -limit these values to a finite set, such as an ID per client. Don't generate a -unique `X-Opaque-Id` header for every request. Too many unique `X-Opaque-Id` -values can prevent {es} from deduplicating warnings in the deprecation logs. - -[discrete] -[[traceparent]] -=== `traceparent` HTTP header - -{es} also supports a `traceparent` HTTP header using the -https://www.w3.org/TR/trace-context/#traceparent-header[official W3C trace -context spec]. You can use the `traceparent` header to trace requests across -Elastic products and other services. Because it's only used for traces, you can -safely generate a unique `traceparent` header for each request. - -If provided, {es} surfaces the header's `trace-id` value as `trace.id` in the: - -* <> -* <> -* <> - -For example, the following `traceparent` value would produce the following -`trace.id` value in the above logs. - -[source,txt] ----- -`traceparent`: 00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01 -`trace.id`: 0af7651916cd43dd8448eb211c80319c ----- - -[discrete] -[[get-requests]] -=== GET and POST requests - -A number of {es} GET APIs--most notably the search API--support a request body. -While the GET action makes sense in the context of retrieving information, -GET requests with a body are not supported by all HTTP libraries. -All {es} GET APIs that require a body can also be submitted as POST requests. -Alternatively, you can pass the request body as the -<> -when using GET. - -include::rest-api/cron-expressions.asciidoc[] - -[discrete] -[[api-date-math-index-names]] -=== Date math support in index and index alias names - -Date math name resolution lets you to search a range of time series indices or -index aliases rather than searching all of your indices and filtering the -results. Limiting the number of searched indices reduces cluster load and -improves search performance. For example, if you are searching for errors in -your daily logs, you can use a date math name template to restrict the search to -the past two days. - -Most APIs that accept an index or index alias argument support date math. A date -math name takes the following form: - -[source,txt] ----------------------------------------------------------------------- - ----------------------------------------------------------------------- - -Where: - -[horizontal] -`static_name`:: Static text -`date_math_expr`:: Dynamic date math expression that computes the date dynamically -`date_format`:: Optional format in which the computed date should be rendered. Defaults to `yyyy.MM.dd`. Format should be compatible with java-time https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html -`time_zone`:: Optional time zone. Defaults to `UTC`. - -NOTE: Pay attention to the usage of small vs capital letters used in the `date_format`. For example: -`mm` denotes minute of hour, while `MM` denotes month of year. Similarly `hh` denotes the hour in the -`1-12` range in combination with `AM/PM`, while `HH` denotes the hour in the `0-23` 24-hour range. - -Date math expressions are resolved locale-independent. Consequently, it is not possible to use any other -calendars than the Gregorian calendar. - -You must enclose date math names in angle brackets. If you use the name in a -request path, special characters must be URI encoded. For example: - -[source,console] ----- -# PUT / -PUT /%3Cmy-index-%7Bnow%2Fd%7D%3E ----- - -[NOTE] -.Percent encoding of date math characters -====================================================== -The special characters used for date rounding must be URI encoded as follows: - -[horizontal] -`<`:: `%3C` -`>`:: `%3E` -`/`:: `%2F` -`{`:: `%7B` -`}`:: `%7D` -`|`:: `%7C` -`+`:: `%2B` -`:`:: `%3A` -`,`:: `%2C` -====================================================== - -The following example shows different forms of date math names and the final names -they resolve to given the current time is 22nd March 2024 noon UTC. - -[options="header"] -|====== -| Expression |Resolves to -| `` | `logstash-2024.03.22` -| `` | `logstash-2024.03.01` -| `` | `logstash-2024.03` -| `` | `logstash-2024.02` -| `` | `logstash-2024.03.23` -|====== - -To use the characters `{` and `}` in the static part of a name template, escape them -with a backslash `\`, for example: - - * `` resolves to `elastic{ON}-2024.03.01` - -The following example shows a search request that searches the Logstash indices for the past -three days, assuming the indices use the default Logstash index name format, -`logstash-YYYY.MM.dd`. - -[source,console] ----------------------------------------------------------------------- -# GET /,,/_search -GET /%3Clogstash-%7Bnow%2Fd-2d%7D%3E%2C%3Clogstash-%7Bnow%2Fd-1d%7D%3E%2C%3Clogstash-%7Bnow%2Fd%7D%3E/_search -{ - "query" : { - "match": { - "test": "data" - } - } -} ----------------------------------------------------------------------- -// TEST[s/^/PUT logstash-2016.09.20\nPUT logstash-2016.09.19\nPUT logstash-2016.09.18\n/] -// TEST[s/now/2016.09.20%7C%7C/] - -[discrete] -[[api-multi-index]] -=== Multi-target syntax - -Most APIs that accept a ``, ``, or `` request path -parameter also support _multi-target syntax_. - -In multi-target syntax, you can use a comma-separated list to run a request on -multiple resources, such as data streams, indices, or aliases: -`test1,test2,test3`. You can also use {wikipedia}/Glob_(programming)[glob-like] -wildcard (`*`) expressions to target resources that match a pattern: `test*` or -`*test` or `te*t` or `*test*`. - -You can exclude targets using the `-` character: `test*,-test3`. - -IMPORTANT: Aliases are resolved after wildcard expressions. This can result in a -request that targets an excluded alias. For example, if `test3` is an index -alias, the pattern `test*,-test3` still targets the indices for `test3`. To -avoid this, exclude the concrete indices for the alias instead. - -You can also exclude clusters from a list of clusters to search using the `-` character: -`remote*:*,-remote1:*,-remote4:*` will search all clusters with an alias that starts -with "remote" except for "remote1" and "remote4". Note that to exclude a cluster -with this notation you must exclude all of its indexes. Excluding a subset of indexes -on a remote cluster is currently not supported. For example, this will throw an exception: -`remote*:*,-remote1:logs*`. - -Multi-target APIs that can target indices support the following query -string parameters: - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=index-ignore-unavailable] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=allow-no-indices] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=expand-wildcards] - -The defaults settings for the above parameters depend on the API being used. - -Some multi-target APIs that can target indices also support the following query -string parameter: - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=ignore_throttled] - -NOTE: APIs with a single target, such as the <>, do -not support multi-target syntax. - -[discrete] -[[multi-hidden]] -==== Hidden data streams and indices - -For most APIs, wildcard expressions do not match hidden data streams and indices -by default. To match hidden data streams and indices using a wildcard -expression, you must specify the `expand_wildcards` query parameter. - -Alternatively, querying an index pattern starting with a dot, such as -`.watcher_hist*`, will match hidden indices by default. This is intended to -mirror Unix file-globbing behavior and provide a smoother transition path to -hidden indices. - -You can create hidden data streams by setting `data_stream.hidden` to `true` in -the stream's matching <>. You can hide -indices using the <> index setting. - -The backing indices for data streams are hidden automatically. Some features, -such as {ml}, store information in hidden indices. - -Global index templates that match all indices are not applied to hidden indices. - -[discrete] -[[system-indices]] -==== System indices - -{es} modules and plugins can store configuration and state information in internal _system indices_. -You should not directly access or modify system indices -as they contain data essential to the operation of the system. - -IMPORTANT: Direct access to system indices is deprecated and -will no longer be allowed in a future major version. - -To view system indices within cluster: - -[source,console] --------------------------------------------------- -GET _cluster/state/metadata?filter_path=metadata.indices.*.system --------------------------------------------------- - -WARNING: When overwriting current cluster state, system indices should be restored -as part of their {ref}/snapshot-restore.html#feature-state[feature state]. - -[discrete] -[[api-conventions-parameters]] -=== Parameters - -Rest parameters (when using HTTP, map to HTTP URL parameters) follow the -convention of using underscore casing. - -[discrete] -[[api-request-body-query-string]] -=== Request body in query string - -For libraries that don't accept a request body for non-POST requests, -you can pass the request body as the `source` query string parameter -instead. When using this method, the `source_content_type` parameter -should also be passed with a media type value that indicates the format -of the source, such as `application/json`. - -[discrete] -[[api-compatibility]] -=== REST API version compatibility - -Major version upgrades often include a number of breaking changes -that impact how you interact with {es}. -While we recommend that you monitor the deprecation logs and -update applications before upgrading {es}, -having to coordinate the necessary changes can be an impediment to upgrading. - -You can enable an existing application to function without modification after -an upgrade by including API compatibility headers, which tell {es} you are still -using the previous version of the REST API. Using these headers allows the -structure of requests and responses to remain the same; it does not guarantee -the same behavior. - - -You set version compatibility on a per-request basis in the `Content-Type` and `Accept` headers. -Setting `compatible-with` to the same major version as -the version you're running has no impact, -but ensures that the request will still work after {es} is upgraded. - -To tell {es} 8.0 you are using the 7.x request and response format, -set `compatible-with=7`: - -[source,sh] ----------------------------------------------------------------------- -Content-Type: application/vnd.elasticsearch+json; compatible-with=7 -Accept: application/vnd.elasticsearch+json; compatible-with=7 ----------------------------------------------------------------------- - -[discrete] -[[api-push-back]] -=== HTTP `429 Too Many Requests` status code push back - -{es} APIs may respond with the HTTP `429 Too Many Requests` status code, indicating that the cluster is too busy -to handle the request. When this happens, consider retrying after a short delay. If the retry also receives -a `429 Too Many Requests` response, extend the delay by backing off exponentially before each subsequent retry. - -[discrete] -[[api-url-access-control]] -=== URL-based access control - -Many users use a proxy with URL-based access control to secure access to -{es} data streams and indices. For <>, -<>, and <> requests, the user has -the choice of specifying a data stream or index in the URL and on each individual request -within the request body. This can make URL-based access control challenging. - -To prevent the user from overriding the data stream or index specified in the -URL, set `rest.action.multi.allow_explicit_index` to `false` in `elasticsearch.yml`. - - -This causes {es} to -reject requests that explicitly specify a data stream or index in the request body. - -[discrete] -=== Boolean Values - -All REST API parameters (both request parameters and JSON body) support -providing boolean "false" as the value `false` and boolean "true" as the -value `true`. All other values will raise an error. - -[[api-conventions-number-values]] -[discrete] -=== Number Values - -When passing a numeric parameter in a request body, you may use a `string` -containing the number instead of the native numeric type. For example: - -[source,console] --------------------------------------------------- -POST /_search -{ - "size": "1000" -} --------------------------------------------------- - -Integer-valued fields in a response body are described as `integer` (or -occasionally `long`) in this manual, but there are generally no explicit bounds -on such values. JSON, SMILE, CBOR and YAML all permit arbitrarily large integer -values. Do not assume that `integer` fields in a response body will always fit -into a 32-bit signed integer. - -[[byte-units]] -[discrete] -=== Byte size units - -Whenever the byte size of data needs to be specified, e.g. when setting a buffer size -parameter, the value must specify the unit, like `10kb` for 10 kilobytes. Note that -these units use powers of 1024, so `1kb` means 1024 bytes. The supported units are: - -[horizontal] -`b`:: Bytes -`kb`:: Kilobytes -`mb`:: Megabytes -`gb`:: Gigabytes -`tb`:: Terabytes -`pb`:: Petabytes - -[[distance-units]] -[discrete] -=== Distance Units - -Wherever distances need to be specified, such as the `distance` parameter in -the <>), the default unit is meters if none is specified. -Distances can be specified in other units, such as `"1km"` or -`"2mi"` (2 miles). - -The full list of units is listed below: - -[horizontal] -Mile:: `mi` or `miles` -Yard:: `yd` or `yards` -Feet:: `ft` or `feet` -Inch:: `in` or `inch` -Kilometer:: `km` or `kilometers` -Meter:: `m` or `meters` -Centimeter:: `cm` or `centimeters` -Millimeter:: `mm` or `millimeters` -Nautical mile:: `NM`, `nmi`, or `nauticalmiles` - -[discrete] -[[time-units]] -=== Time units - -Whenever durations need to be specified, e.g. for a `timeout` parameter, the duration must specify -the unit, like `2d` for 2 days. The supported units are: - -[horizontal] -`d`:: Days -`h`:: Hours -`m`:: Minutes -`s`:: Seconds -`ms`:: Milliseconds -`micros`:: Microseconds -`nanos`:: Nanoseconds - -[[size-units]] -[discrete] -=== Unit-less quantities - -Unit-less quantities means that they don't have a "unit" like "bytes" or "Hertz" or "meter" or "long tonne". - -If one of these quantities is large we'll print it out like 10m for 10,000,000 or 7k for 7,000. We'll still print 87 -when we mean 87 though. These are the supported multipliers: - -[horizontal] -`k`:: Kilo -`m`:: Mega -`g`:: Giga -`t`:: Tera -`p`:: Peta - - - diff --git a/docs/reference/autoscaling/apis/autoscaling-apis.asciidoc b/docs/reference/autoscaling/apis/autoscaling-apis.asciidoc deleted file mode 100644 index 87de3818bfaf8..0000000000000 --- a/docs/reference/autoscaling/apis/autoscaling-apis.asciidoc +++ /dev/null @@ -1,30 +0,0 @@ -[role="xpack"] -[[autoscaling-apis]] -== Autoscaling APIs - -NOTE: {cloud-only} - - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-autoscaling[Autoscaling APIs]. --- - -You can use the following APIs to perform {cloud}/ec-autoscaling.html[autoscaling operations]. - -[discrete] -[[autoscaling-api-top-level]] -=== Top-Level - -* <> -* <> -* <> -* <> - -// top-level -include::put-autoscaling-policy.asciidoc[] -include::get-autoscaling-capacity.asciidoc[] -include::delete-autoscaling-policy.asciidoc[] -include::get-autoscaling-policy.asciidoc[] - diff --git a/docs/reference/autoscaling/apis/delete-autoscaling-policy.asciidoc b/docs/reference/autoscaling/apis/delete-autoscaling-policy.asciidoc deleted file mode 100644 index 349e40aab0540..0000000000000 --- a/docs/reference/autoscaling/apis/delete-autoscaling-policy.asciidoc +++ /dev/null @@ -1,97 +0,0 @@ -[role="xpack"] -[[autoscaling-delete-autoscaling-policy]] -=== Delete autoscaling policy API -++++ -Delete autoscaling policy -++++ - -NOTE: {cloud-only} - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-autoscaling[Autoscaling APIs]. --- - -Delete {cloud}/ec-autoscaling.html[autoscaling] policy. - -[[autoscaling-delete-autoscaling-policy-request]] -==== {api-request-title} - -////////////////////////// -[source,console] --------------------------------------------------- -PUT /_autoscaling/policy/my_autoscaling_policy -{ - "roles": [], - "deciders": { - "fixed": { - } - } -} --------------------------------------------------- -// TESTSETUP -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_autoscaling/policy/ --------------------------------------------------- -// TEST[s//my_autoscaling_policy/] - -[[autoscaling-delete-autoscaling-policy-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the -`manage_autoscaling` <> to use this -API. - -* If the <> is enabled, only operator -users can use this API. - -[[autoscaling-delete-autoscaling-policy-desc]] -==== {api-description-title} - -This API deletes an autoscaling policy with the provided name. - -[[autoscaling-delete-autoscaling-policy-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] - -[[autoscaling-delete-autoscaling-policy-examples]] -==== {api-examples-title} - -This example deletes an autoscaling policy named `my_autoscaling_policy`. - -[source,console] --------------------------------------------------- -DELETE /_autoscaling/policy/my_autoscaling_policy --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged": true -} --------------------------------------------------- - -This example deletes all autoscaling policies. - -[source,console] --------------------------------------------------- -DELETE /_autoscaling/policy/* --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged": true -} --------------------------------------------------- diff --git a/docs/reference/autoscaling/apis/get-autoscaling-capacity.asciidoc b/docs/reference/autoscaling/apis/get-autoscaling-capacity.asciidoc deleted file mode 100644 index d45f7cbacc242..0000000000000 --- a/docs/reference/autoscaling/apis/get-autoscaling-capacity.asciidoc +++ /dev/null @@ -1,280 +0,0 @@ -[role="xpack"] -[[autoscaling-get-autoscaling-capacity]] -=== Get autoscaling capacity API -++++ -Get autoscaling capacity -++++ - -NOTE: {cloud-only} - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-autoscaling[Autoscaling APIs]. --- - -Get {cloud}/ec-autoscaling.html[autoscaling] capacity. - -[[autoscaling-get-autoscaling-capacity-request]] -==== {api-request-title} - -[source,console] --------------------------------------------------- -GET /_autoscaling/capacity/ --------------------------------------------------- -// TEST - -[[autoscaling-get-autoscaling-capacity-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have -`manage_autoscaling` cluster privileges. For more information, see -<>. - -[[autoscaling-get-autoscaling-capacity-desc]] -==== {api-description-title} - -This API gets the current autoscaling capacity based on the configured -autoscaling policy. This API will return information to size the cluster -appropriately to the current workload. - -The `required_capacity` is calculated as the max of the `required_capacity` -result of all individual deciders that are enabled for the policy. - -The operator should verify that the `current_nodes` match -the operator's knowledge of the cluster to avoid making autoscaling decisions -based on stale or incomplete information. - -The response contains decider-specific information you can use to diagnose how -and why autoscaling determined a certain capacity was required. This information -is provided for diagnosis only. Do not use this information to make autoscaling -decisions. - -[[autoscaling-get-autoscaling-capacity-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[role="child_attributes"] -[[autoscaling-get-autoscaling-capacity-api-response-body]] -==== {api-response-body-title} - -`policies`:: -(object) -Contains the map of policy name to capacity result -+ -.Properties of `policies` -[%collapsible%open] -==== -``:: -(object) -Contains capacity information for the policy -+ -.Properties of `` -[%collapsible%open] -===== -`required_capacity`:: -(object) -Contains the required capacity for the policy. -+ -.Properties of `required_capacity` -[%collapsible%open] -====== -`node`:: -(object) -Contains the minimum node sizes required per node, ensuring that individual -shards or ML jobs can fit into a single node. -+ -.Properties of `node` -[%collapsible%open] -======= -`storage`:: -(integer) -Bytes of storage required per node. - -`memory`:: -(integer) -Bytes of memory required per node. - -`processors`:: -(float) -Number of processors (vCPUs) required per node. - -======= -`total`:: -(object) -Contains the total size required for the policy. -+ -.Properties of `total` -[%collapsible%open] -======= -`storage`:: -(integer) -Total bytes of storage required for the policy. - -`memory`:: -(integer) -Total bytes of memory required for the policy. - -`processors`:: -(float) -Total number of processors (vCPUs) required for the policy. - -======= -====== -`current_capacity`:: -(object) -Contains the current capacity for nodes governed by the policy, i.e. the nodes -that {es} bases its calculation on. -+ -.Properties of `current_capacity` -[%collapsible%open] -====== -`node`:: -(object) -Contains the maximum sizes of nodes governed by the policy. -+ -.Properties of `node` -[%collapsible%open] -======= -`storage`:: -(integer) -Maximum bytes of storage of a node. - -`memory`:: -(integer) -Maximum bytes of memory of a node. - -`processors`:: -(float) -Maximum number of processors (vCPUs) of a node. - -======= -`total`:: -(object) -Contains the current total storage and memory sizes for nodes governed by the policy. -+ -.Properties of `total` -[%collapsible%open] -======= -`storage`:: -(integer) -Current bytes of storage available for the policy. - -`memory`:: -(integer) -Current bytes of memory available for the policy. - -`processors`:: -Current number of processors (vCPUs) available for the policy. - -======= -====== -`current_nodes`:: -(array of objects) -List of nodes used for capacity calculation. -+ -.Properties of elements in `current_nodes` -[%collapsible%open] -====== -`name`:: -(string) -Name of the node. -====== -`deciders`:: -(object) -The capacity results from individual deciders, allowing insight into how the -outer level `required_capacity` was calculated. -+ -.Properties of `deciders` -[%collapsible%open] -======= -``:: -(object) -The capacity result for a specific decider enabled for the policy. -+ -.Properties of `` -[%collapsible%open] -======== -`required_capacity`:: -(object) -Required capacity determined by the decider. -+ -.Properties of `required_capacity` -[%collapsible%open] -========= -`node`:: -(object) -Contains the minimum node sizes required per node, ensuring that individual -shards or {ml} jobs can fit into a single node. -+ -.Properties of `node` -[%collapsible%open] -========== -`storage`:: -(integer) -Bytes of storage required per node. - -`memory`:: -(integer) -Bytes of memory required per node. - -`processors`:: -(float) -Number of processors (vCPUs) required per node. - -========== -`total`:: -(object) -Contains the total size required for the policy. -+ -.Properties of `total` -[%collapsible%open] -========== -`storage`:: -(integer) -Total bytes of storage required for the policy. - -`memory`:: -(integer) -Total bytes of memory required for the policy. - -`processors`:: -(float) -Total number of processors (vCPUs) required for the policy. - -========== -========= -`reason_summary`:: -(string) -Description of the basis for the decider's result. - -`reason_details`:: -(object) -A per-decider structure containing details about the basis for the deciders' result. -The contents should not be relied on for application purposes and are not subject -to backwards compatibility guarantees. -======== -======= -===== -==== - -[[autoscaling-get-autoscaling-capacity-examples]] -==== {api-examples-title} - -This example retrieves the current autoscaling capacity. - -[source,console] --------------------------------------------------- -GET /_autoscaling/capacity --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - policies: {} -} --------------------------------------------------- diff --git a/docs/reference/autoscaling/apis/get-autoscaling-policy.asciidoc b/docs/reference/autoscaling/apis/get-autoscaling-policy.asciidoc deleted file mode 100644 index 9962b266fb662..0000000000000 --- a/docs/reference/autoscaling/apis/get-autoscaling-policy.asciidoc +++ /dev/null @@ -1,90 +0,0 @@ -[role="xpack"] -[[autoscaling-get-autoscaling-policy]] -=== Get autoscaling policy API -++++ -Get autoscaling policy -++++ - -NOTE: {cloud-only} - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-autoscaling[Autoscaling APIs]. --- - - -Get {cloud}/ec-autoscaling.html[autoscaling] policy. - -[[autoscaling-get-autoscaling-policy-request]] -==== {api-request-title} - -////////////////////////// -[source,console] --------------------------------------------------- -PUT /_autoscaling/policy/my_autoscaling_policy -{ - "roles" : [], - "deciders": { - "fixed": { - } - } -} --------------------------------------------------- -// TESTSETUP - - -[source,console] --------------------------------------------------- -DELETE /_autoscaling/policy/my_autoscaling_policy --------------------------------------------------- -// TEST -// TEARDOWN - -////////////////////////// - -[source,console] --------------------------------------------------- -GET /_autoscaling/policy/ --------------------------------------------------- -// TEST[s//my_autoscaling_policy/] - -[[autoscaling-get-autoscaling-policy-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have -`manage_autoscaling` cluster privileges. For more information, see -<>. - -[[autoscaling-get-autoscaling-policy-desc]] -==== {api-description-title} - -This API gets an autoscaling policy with the provided name. - -[[autoscaling-get-autoscaling-policy-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[autoscaling-get-autoscaling-policy-examples]] -==== {api-examples-title} - -This example gets an autoscaling policy named `my_autoscaling_policy`. - -[source,console] --------------------------------------------------- -GET /_autoscaling/policy/my_autoscaling_policy --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "roles": , - "deciders": -} --------------------------------------------------- -// TEST[s//$body.roles/] -// TEST[s//$body.deciders/] diff --git a/docs/reference/autoscaling/apis/put-autoscaling-policy.asciidoc b/docs/reference/autoscaling/apis/put-autoscaling-policy.asciidoc deleted file mode 100644 index 97c6a54fab03a..0000000000000 --- a/docs/reference/autoscaling/apis/put-autoscaling-policy.asciidoc +++ /dev/null @@ -1,102 +0,0 @@ -[role="xpack"] -[[autoscaling-put-autoscaling-policy]] -=== Create or update autoscaling policy API -++++ -Create or update autoscaling policy -++++ - -NOTE: {cloud-only} - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-autoscaling[Autoscaling APIs]. --- - -Creates or updates an {cloud}/ec-autoscaling.html[autoscaling] policy. - -[[autoscaling-put-autoscaling-policy-request]] -==== {api-request-title} - -[source,console] --------------------------------------------------- -PUT /_autoscaling/policy/ -{ - "roles": [], - "deciders": { - "fixed": { - } - } -} --------------------------------------------------- -// TEST[s//name/] - -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_autoscaling/policy/name --------------------------------------------------- -// TEST[continued] - -////////////////////////// - -[[autoscaling-put-autoscaling-policy-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the -`manage_autoscaling` <> to use this -API. - -* If the <> is enabled, only operator -users can use this API. - -[[autoscaling-put-autoscaling-policy-desc]] -==== {api-description-title} - -This API puts an autoscaling policy with the provided name. -See <> for available deciders. - -[[autoscaling-put-autoscaling-policy-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] - -[[autoscaling-put-autoscaling-policy-examples]] -==== {api-examples-title} - -This example puts an autoscaling policy named `my_autoscaling_policy` using the -fixed autoscaling decider, applying to the set of nodes having (only) the -"data_hot" role. - -[source,console] --------------------------------------------------- -PUT /_autoscaling/policy/my_autoscaling_policy -{ - "roles" : [ "data_hot" ], - "deciders": { - "fixed": { - } - } -} --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged": true -} --------------------------------------------------- - -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_autoscaling/policy/my_autoscaling_policy --------------------------------------------------- -// TEST[continued] - -////////////////////////// diff --git a/docs/reference/autoscaling/autoscaling-deciders.asciidoc b/docs/reference/autoscaling/autoscaling-deciders.asciidoc deleted file mode 100644 index 7fd6f59647f4d..0000000000000 --- a/docs/reference/autoscaling/autoscaling-deciders.asciidoc +++ /dev/null @@ -1,39 +0,0 @@ -[role="xpack"] -[[autoscaling-deciders]] -== Autoscaling deciders - -<>:: -Estimates required storage capacity of current data set. Available for policies -governing data nodes. - -<>:: -Estimates required storage capacity based on current ingestion into hot nodes. -Available for policies governing hot data nodes. - -<>:: -Estimates required memory capacity based on the number of partially mounted shards. -Available for policies governing frozen data nodes. - -<>:: -Estimates required storage capacity as a percentage of the total data set of -partially mounted indices. -Available for policies governing frozen data nodes. - -<>:: -Estimates a minimum require frozen memory and storage capacity when any index is -in the frozen <> phase. - -<>:: -Estimates required memory capacity based on machine learning jobs. -Available for policies governing machine learning nodes. - -<>:: -Responds with a fixed required capacity. This decider is intended for testing only. - -include::deciders/reactive-storage-decider.asciidoc[] -include::deciders/proactive-storage-decider.asciidoc[] -include::deciders/frozen-shards-decider.asciidoc[] -include::deciders/frozen-storage-decider.asciidoc[] -include::deciders/frozen-existence-decider.asciidoc[] -include::deciders/machine-learning-decider.asciidoc[] -include::deciders/fixed-decider.asciidoc[] diff --git a/docs/reference/autoscaling/deciders/fixed-decider.asciidoc b/docs/reference/autoscaling/deciders/fixed-decider.asciidoc deleted file mode 100644 index 5a8b009d9f063..0000000000000 --- a/docs/reference/autoscaling/deciders/fixed-decider.asciidoc +++ /dev/null @@ -1,71 +0,0 @@ -[role="xpack"] -[[autoscaling-fixed-decider]] -=== Fixed decider - -experimental[] -[WARNING] -The fixed decider is intended for testing only. Do not use this decider in production. - -The {cloud}/ec-autoscaling.html[autoscaling] `fixed` decider responds with a fixed required capacity. It is not enabled -by default but can be enabled for any policy by explicitly configuring it. - -==== Configuration settings - -`storage`:: -(Optional, <>) -Required amount of node-level storage. Defaults to `-1` (disabled). - -`memory`:: -(Optional, <>) -Required amount of node-level memory. Defaults to `-1` (disabled). - -`processors`:: -(Optional, float) -Required number of processors. Defaults to disabled. - -`nodes`:: -(Optional, integer) -Number of nodes to use when calculating capacity. Defaults to `1`. - -[[autoscaling-fixed-decider-examples]] -==== {api-examples-title} - -This example puts an autoscaling policy named `my_autoscaling_policy`, enabling -and configuring the fixed decider. - -[source,console] --------------------------------------------------- -PUT /_autoscaling/policy/my_autoscaling_policy -{ - "roles" : [ "data_hot" ], - "deciders": { - "fixed": { - "storage": "1tb", - "memory": "32gb", - "processors": 2.3, - "nodes": 8 - } - } -} --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged": true -} --------------------------------------------------- - -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_autoscaling/policy/my_autoscaling_policy --------------------------------------------------- -// TEST[continued] - -////////////////////////// - diff --git a/docs/reference/autoscaling/deciders/frozen-existence-decider.asciidoc b/docs/reference/autoscaling/deciders/frozen-existence-decider.asciidoc deleted file mode 100644 index 0fc9ad444a213..0000000000000 --- a/docs/reference/autoscaling/deciders/frozen-existence-decider.asciidoc +++ /dev/null @@ -1,9 +0,0 @@ -[role="xpack"] -[[autoscaling-frozen-existence-decider]] -=== Frozen existence decider - -The {cloud}/ec-autoscaling.html[autoscaling] frozen existence decider (`frozen_existence`) ensures that once the first -index enters the frozen ILM phase, the frozen tier is scaled into existence. - -The frozen existence decider is enabled for all policies governing frozen data -nodes and has no configuration options. diff --git a/docs/reference/autoscaling/deciders/frozen-shards-decider.asciidoc b/docs/reference/autoscaling/deciders/frozen-shards-decider.asciidoc deleted file mode 100644 index 1977f95797ef0..0000000000000 --- a/docs/reference/autoscaling/deciders/frozen-shards-decider.asciidoc +++ /dev/null @@ -1,16 +0,0 @@ -[role="xpack"] -[[autoscaling-frozen-shards-decider]] -=== Frozen shards decider - -The {cloud}/ec-autoscaling.html[autoscaling] frozen shards decider (`frozen_shards`) calculates the memory required to search -the current set of partially mounted indices in the frozen tier. Based on a -required memory amount per shard, it calculates the necessary memory in the frozen tier. - -[[autoscaling-frozen-shards-decider-settings]] -==== Configuration settings - -`memory_per_shard`:: -(Optional, <>) -The memory needed per shard, in bytes. Defaults to 2000 shards per 64 GB node (roughly 32 MB per shard). -Notice that this is total memory, not heap, assuming that the Elasticsearch default heap sizing -mechanism is used and that nodes are not bigger than 64 GB. diff --git a/docs/reference/autoscaling/deciders/frozen-storage-decider.asciidoc b/docs/reference/autoscaling/deciders/frozen-storage-decider.asciidoc deleted file mode 100644 index 3a8e7cdb518b3..0000000000000 --- a/docs/reference/autoscaling/deciders/frozen-storage-decider.asciidoc +++ /dev/null @@ -1,19 +0,0 @@ -[role="xpack"] -[[autoscaling-frozen-storage-decider]] -=== Frozen storage decider - -The {cloud}/ec-autoscaling.html[autoscaling] frozen storage decider (`frozen_storage`) calculates the local storage -required to search the current set of partially mounted indices based on a -percentage of the total data set size of such indices. It signals that -additional storage capacity is necessary when existing capacity is less than the -percentage multiplied by total data set size. - -The frozen storage decider is enabled for all policies governing frozen data -nodes and has no configuration options. - -[[autoscaling-frozen-storage-decider-settings]] -==== Configuration settings - -`percentage`:: -(Optional, number value) -Percentage of local storage relative to the data set size. Defaults to 5. diff --git a/docs/reference/autoscaling/deciders/machine-learning-decider.asciidoc b/docs/reference/autoscaling/deciders/machine-learning-decider.asciidoc deleted file mode 100644 index 5432d96a47edb..0000000000000 --- a/docs/reference/autoscaling/deciders/machine-learning-decider.asciidoc +++ /dev/null @@ -1,82 +0,0 @@ -[role="xpack"] -[[autoscaling-machine-learning-decider]] -=== Machine learning decider - -The {cloud}/ec-autoscaling.html[autoscaling] {ml} decider (`ml`) calculates the memory and CPU requirements to run {ml} -jobs and trained models. - -The {ml} decider is enabled for policies governing `ml` nodes. - -NOTE: For {ml} jobs to open when the cluster is not appropriately scaled, set -`xpack.ml.max_lazy_ml_nodes` to the largest number of possible {ml} nodes (refer -to <> for more information). In {ess}, this is -automatically set. - -[[autoscaling-machine-learning-decider-settings]] -==== Configuration settings - -Both `num_anomaly_jobs_in_queue` and `num_analytics_jobs_in_queue` are designed -to delay a scale-up event. If the cluster is too small, these settings indicate -how many jobs of each type can be unassigned from a node. Both settings are only -considered for jobs that can be opened given the current scale. If a job is too -large for any node size or if a job can't be assigned without user intervention -(for example, a user calling `_stop` against a real-time {anomaly-job}), the -numbers are ignored for that particular job. - -`num_anomaly_jobs_in_queue`:: -(Optional, integer) -Specifies the number of queued {anomaly-jobs} to allow. Defaults to `0`. - -`num_analytics_jobs_in_queue`:: -(Optional, integer) -Specifies the number of queued {dfanalytics-jobs} to allow. Defaults to `0`. - -`down_scale_delay`:: -(Optional, <>) -Specifies the time to delay before scaling down. Defaults to 1 hour. If a scale -down is possible for the entire time window, then a scale down is requested. If -the cluster requires a scale up during the window, the window is reset. - - -[[autoscaling-machine-learning-decider-examples]] -==== {api-examples-title} - -This example creates an autoscaling policy named `my_autoscaling_policy` that -overrides the default configuration of the {ml} decider. - -[source,console] --------------------------------------------------- -PUT /_autoscaling/policy/my_autoscaling_policy -{ - "roles" : [ "ml" ], - "deciders": { - "ml": { - "num_anomaly_jobs_in_queue": 5, - "num_analytics_jobs_in_queue": 3, - "down_scale_delay": "30m" - } - } -} --------------------------------------------------- -// TEST - - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged": true -} --------------------------------------------------- - - -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_autoscaling/policy/my_autoscaling_policy --------------------------------------------------- -// TEST[continued] - -////////////////////////// diff --git a/docs/reference/autoscaling/deciders/proactive-storage-decider.asciidoc b/docs/reference/autoscaling/deciders/proactive-storage-decider.asciidoc deleted file mode 100644 index 33c989f3b12eb..0000000000000 --- a/docs/reference/autoscaling/deciders/proactive-storage-decider.asciidoc +++ /dev/null @@ -1,58 +0,0 @@ -[role="xpack"] -[[autoscaling-proactive-storage-decider]] -=== Proactive storage decider - -The {cloud}/ec-autoscaling.html[autoscaling] proactive storage decider (`proactive_storage`) calculates the storage required to contain -the current data set plus an estimated amount of expected additional data. - -The proactive storage decider is enabled for all policies governing nodes with the `data_hot` role. - -The estimation of expected additional data is based on past indexing that -occurred within the `forecast_window`. -Only indexing into data streams contributes to the estimate. - -[[autoscaling-proactive-storage-decider-settings]] -==== Configuration settings - -`forecast_window`:: -(Optional, <>) -The window of time to use for forecasting. Defaults to 30 minutes. - -[[autoscaling-proactive-storage-decider-examples]] -==== {api-examples-title} - -This example puts an autoscaling policy named `my_autoscaling_policy`, overriding -the proactive decider's `forecast_window` to be 10 minutes. - -[source,console] --------------------------------------------------- -PUT /_autoscaling/policy/my_autoscaling_policy -{ - "roles" : [ "data_hot" ], - "deciders": { - "proactive_storage": { - "forecast_window": "10m" - } - } -} --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged": true -} --------------------------------------------------- - -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_autoscaling/policy/my_autoscaling_policy --------------------------------------------------- -// TEST[continued] - -////////////////////////// diff --git a/docs/reference/autoscaling/deciders/reactive-storage-decider.asciidoc b/docs/reference/autoscaling/deciders/reactive-storage-decider.asciidoc deleted file mode 100644 index 7c38df75169fd..0000000000000 --- a/docs/reference/autoscaling/deciders/reactive-storage-decider.asciidoc +++ /dev/null @@ -1,17 +0,0 @@ -[role="xpack"] -[[autoscaling-reactive-storage-decider]] -=== Reactive storage decider - -The {cloud}/ec-autoscaling.html[autoscaling] reactive storage decider (`reactive_storage`) calculates the storage required to contain -the current data set. It signals that additional storage capacity is necessary -when existing capacity has been exceeded (reactively). - -The reactive storage decider is enabled for all policies governing data nodes and has no configuration options. - -The decider relies partially on using <> -allocation rather than node attributes. In particular, scaling a data tier into -existence (starting the first node in a tier) will result in starting a node in -any data tier that is empty if not using allocation based on data tier preference. -Using the <> action to migrate between tiers is the -preferred way of allocating to tiers and fully supports scaling a tier into -existence. diff --git a/docs/reference/autoscaling/index.asciidoc b/docs/reference/autoscaling/index.asciidoc deleted file mode 100644 index e70c464889419..0000000000000 --- a/docs/reference/autoscaling/index.asciidoc +++ /dev/null @@ -1,29 +0,0 @@ -[role="xpack"] -[[xpack-autoscaling]] -= Autoscaling - -NOTE: {cloud-only} - -The {cloud}/ec-autoscaling.html[autoscaling] feature enables an operator to configure tiers of nodes that -self-monitor whether or not they need to scale based on an operator-defined -policy. Then, via the autoscaling API, an Elasticsearch cluster can report -whether or not it needs additional resources to meet the policy. For example, an -operator could define a policy that a warm tier should scale on available disk -space. Elasticsearch would monitor and forecast the available disk space in the -warm tier, and if the forecast is such that the cluster will soon not be able to -allocate existing and future shard copies due to disk space, then the -autoscaling API would report that the cluster needs to scale due to disk space. -It remains the responsibility of the operator to add the additional resources -that the cluster signals it requires. - -A policy is composed of a list of roles and a list of deciders. Nodes matching -the roles are governed by the policy. The deciders provide independent estimates -of the capacity required. See <> for -more information on the deciders available. - -Autoscaling supports the scale-up and scale-down of dedicated {ml} -nodes. Autoscaling also supports the scale-up of data nodes based on storage. -[NOTE] -Autoscaling is not supported on Debian 8. - -include::autoscaling-deciders.asciidoc[] diff --git a/docs/reference/behavioral-analytics/apis/delete-analytics-collection.asciidoc b/docs/reference/behavioral-analytics/apis/delete-analytics-collection.asciidoc deleted file mode 100644 index 1c6c39ea137e9..0000000000000 --- a/docs/reference/behavioral-analytics/apis/delete-analytics-collection.asciidoc +++ /dev/null @@ -1,54 +0,0 @@ -[role="xpack"] -[[delete-analytics-collection]] -=== Delete Analytics Collection - -deprecated:[9.0.0] -beta::[] - -++++ -Delete Analytics Collection -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-analytics[Behavioral analytics APIs]. --- - -Removes a <> Collection and its associated data stream. - -[[delete-analytics-collection-request]] -==== {api-request-title} - -`DELETE _application/analytics/` - -[[delete-analytics-collection-prereq]] -==== {api-prereq-title} - -Requires the `manage_behavioral_analytics` cluster privilege. - -[[delete-analytics-collection-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[[delete-analytics-collection-response-codes]] -==== {api-response-codes-title} - -`400`:: -The `name` was not provided. - -`404` (Missing resources):: -No Analytics Collection matching `name` could be found. - -[[delete-analytics-collection-example]] -==== {api-examples-title} - -The following example deletes the Analytics Collection named `my_analytics_collection`: - -[source,console] ----- -DELETE _application/analytics/my_analytics_collection/ ----- -// TEST[skip:Behavioral Analytics APIs emit deprecation warnings and will not be updated] diff --git a/docs/reference/behavioral-analytics/apis/index.asciidoc b/docs/reference/behavioral-analytics/apis/index.asciidoc deleted file mode 100644 index 1fdcd0f1afc9a..0000000000000 --- a/docs/reference/behavioral-analytics/apis/index.asciidoc +++ /dev/null @@ -1,32 +0,0 @@ -[[behavioral-analytics-apis]] -== Behavioral Analytics APIs - -deprecated:[9.0.0] -beta::[] - -++++ -Behavioral Analytics APIs -++++ - ---- - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-analytics[Behavioral analytics APIs]. --- - -Use the following APIs to manage tasks and resources related to <>: - -* <> -* <> -* <> - -Use the following API to ingest events into an Analytics collection: - -* <> - -include::put-analytics-collection.asciidoc[] -include::delete-analytics-collection.asciidoc[] -include::list-analytics-collection.asciidoc[] -include::post-analytics-collection-event.asciidoc[] diff --git a/docs/reference/behavioral-analytics/apis/list-analytics-collection.asciidoc b/docs/reference/behavioral-analytics/apis/list-analytics-collection.asciidoc deleted file mode 100644 index c0892529bb58f..0000000000000 --- a/docs/reference/behavioral-analytics/apis/list-analytics-collection.asciidoc +++ /dev/null @@ -1,120 +0,0 @@ -[role="xpack"] -[[list-analytics-collection]] -=== List Analytics Collections - -deprecated:[9.0.0] -beta::[] - -++++ -List Analytics Collections -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-analytics[Behavioral analytics APIs]. --- - -Returns information about <> Collections. - -[[list-analytics-collection-request]] -==== {api-request-title} - -`GET _application/analytics/` - -[[list-analytics-collection-prereq]] -==== {api-prereq-title} - -Requires the `manage_behavioral_analytics` cluster privilege. - -[[list-analytics-collection-path-params]] -==== {api-path-parms-title} - -``:: -(optional, string) Criteria is used to find a matching analytics collection. -This could be the name of the collection or a pattern to match multiple. -If not specified, will return all analytics collections. - -[[list-analytics-collection-response-codes]] -==== {api-response-codes-title} - -`404`:: -Criteria does not match any Analytics Collections. - -==== {api-response-codes-title} - -[[list-analytics-collection-example]] -==== {api-examples-title} - -The following example lists all configured Analytics Collections: - -[source,console] ----- -GET _application/analytics/ ----- -// TEST[skip:Behavioral Analytics APIs emit deprecation warnings and will not be updated] - -A sample response: - -[source,console-result] ----- -{ - "my_analytics_collection": { - "event_data_stream": { - "name": "behavioral_analytics-events-my_analytics_collection" - } - }, - "my_analytics_collection2": { - "event_data_stream": { - "name": "behavioral_analytics-events-my_analytics_collection2" - } - } -} ----- - -The following example returns the Analytics Collection that matches `my_analytics_collection`: - -[source,console] ----- -GET _application/analytics/my_analytics_collection ----- -// TEST[skip:Behavioral Analytics APIs emit deprecation warnings and will not be updated] - -A sample response: - -[source,console-result] ----- -{ - "my_analytics_collection": { - "event_data_stream": { - "name": "behavioral_analytics-events-my_analytics_collection" - } - } -} ----- - -The following example returns all Analytics Collections prefixed with `my`: - -[source,console] ----- -GET _application/analytics/my* ----- -// TEST[skip:Behavioral Analytics APIs emit deprecation warnings and will not be updated] - -A sample response: - -[source,console-result] ----- -{ - "my_analytics_collection": { - "event_data_stream": { - "name": "behavioral_analytics-events-my_analytics_collection" - } - }, - "my_analytics_collection2": { - "event_data_stream": { - "name": "behavioral_analytics-events-my_analytics_collection2" - } - } -} ----- diff --git a/docs/reference/behavioral-analytics/apis/post-analytics-collection-event.asciidoc b/docs/reference/behavioral-analytics/apis/post-analytics-collection-event.asciidoc deleted file mode 100644 index aad246872e927..0000000000000 --- a/docs/reference/behavioral-analytics/apis/post-analytics-collection-event.asciidoc +++ /dev/null @@ -1,95 +0,0 @@ -[role="xpack"] -[[post-analytics-collection-event]] -=== Post Event to an Analytics Collection - -deprecated:[9.0.0] -beta::[] - -++++ -Post Analytics Collection Event -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-analytics[Behavioral analytics APIs]. --- - -Post an event to a <> Collection. - -[[post-analytics-collection-event-request]] -==== {api-request-title} - -`POST _application/analytics//event/` - -[[post-analytics-collection-event-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) Analytics collection name you want to ingest event in. - -``:: -(Required, string) Analytics event type. Can be one of `page_view`, `search`, `search_click`. - -[[post-analytics-collection-event-request-body]] -==== {api-request-body-title} - -Full request body parameters can be found in: <>. - -[[post-analytics-collection-event-prereqs]] -==== {api-prereq-title} - -Requires the `post_behavioral_analytics_event` cluster privilege. - -[[post-analytics-collection-event-response-codes]] -==== {api-response-codes-title} - -`202`:: -Event has been accepted and will be ingested. - -`404`:: -Analytics Collection `` does not exists. - -`400`:: -Occurs either when the event type is unknown or when event payload contains invalid data. - -[[post-analytics-collection-event-example]] -==== {api-examples-title} - -The following example send a `search_click` event to an Analytics Collection called `my_analytics_collection`: - -[source,console] ----- -POST _application/analytics/my_analytics_collection/event/search_click -{ - "session": { - "id": "1797ca95-91c9-4e2e-b1bd-9c38e6f386a9" - }, - "user": { - "id": "5f26f01a-bbee-4202-9298-81261067abbd" - }, - "search":{ - "query": "search term", - "results": { - "items": [ - { - "document": { - "id": "123", - "index": "products" - } - } - ], - "total_results": 10 - }, - "sort": { - "name": "relevance" - }, - "search_application": "website" - }, - "document":{ - "id": "123", - "index": "products" - } -} ----- -// TEST[skip:Behavioral Analytics APIs emit deprecation warnings and will not be updated] diff --git a/docs/reference/behavioral-analytics/apis/put-analytics-collection.asciidoc b/docs/reference/behavioral-analytics/apis/put-analytics-collection.asciidoc deleted file mode 100644 index 0547630db9543..0000000000000 --- a/docs/reference/behavioral-analytics/apis/put-analytics-collection.asciidoc +++ /dev/null @@ -1,51 +0,0 @@ -[role="xpack"] -[[put-analytics-collection]] -=== Put Analytics Collection - -deprecated:[9.0.0] -beta::[] - -++++ -Put Analytics Collection -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-analytics[Behavioral analytics APIs]. --- - -Creates a <> Collection. - -[[put-analytics-collection-request]] -==== {api-request-title} - -`PUT _application/analytics/` - -[[put-analytics-collection-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[[put-analytics-collection-prereqs]] -==== {api-prereq-title} - -Requires the `manage_behavioral_analytics` cluster privilege. - -[[put-analytics-collection-response-codes]] -==== {api-response-codes-title} - -`400`:: -Analytics Collection `` exists. - -[[put-analytics-collection-example]] -==== {api-examples-title} - -The following example creates a new Analytics Collection called `my_analytics_collection`: - -[source,console] ----- -PUT _application/analytics/my_analytics_collection ----- -// TEST[skip:Behavioral Analytics APIs emit deprecation warnings and will not be updated] diff --git a/docs/reference/cat.asciidoc b/docs/reference/cat.asciidoc deleted file mode 100644 index 61ec9f7680f7f..0000000000000 --- a/docs/reference/cat.asciidoc +++ /dev/null @@ -1,280 +0,0 @@ -[[cat]] -== Compact and aligned text (CAT) APIs - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs]. --- - -["float",id="intro"] -=== Introduction - -JSON is great... for computers. Even if it's pretty-printed, trying -to find relationships in the data is tedious. Human eyes, especially -when looking at a terminal, need compact and aligned text. The compact and -aligned text (CAT) APIs aim to meet this need. - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the -{kibana-ref}/console-kibana.html[Kibana console] or command line. They are _not_ -intended for use by applications. For application consumption, we recommend -using a corresponding JSON API. -==== - -All the cat commands accept a query string parameter `help` to see all -the headers and info they provide, and the `/_cat` command alone lists all -the available commands. - -[discrete] -[[common-parameters]] -=== Common parameters - -[discrete] -[[verbose]] -==== Verbose - -Each of the commands accepts a query string parameter `v` to turn on -verbose output. For example: - -[source,console] ----- -GET _cat/master?v=true ----- - -Might respond with: - -[source,txt] ----- -id host ip node -u_n93zwxThWHi1PDBJAGAg 127.0.0.1 127.0.0.1 u_n93zw ----- -// TESTRESPONSE[s/u_n93zw(xThWHi1PDBJAGAg)?/.+/ non_json] - -[discrete] -[[help]] -==== Help - -Each of the commands accepts a query string parameter `help` which will -output its available columns. For example: - -[source,console] ----- -GET _cat/master?help ----- - -Might respond with: - -[source,txt] ----- -id | | node id -host | h | host name -ip | | ip address -node | n | node name ----- -// TESTRESPONSE[s/[|]/[|]/ non_json] - -NOTE: `help` is not supported if any optional url parameter is used. -For example `GET _cat/shards/my-index-000001?help` or `GET _cat/indices/my-index-*?help` -results in an error. Use `GET _cat/shards?help` or `GET _cat/indices?help` -instead. - -[discrete] -[[headers]] -==== Headers - -Each of the commands accepts a query string parameter `h` which forces -only those columns to appear. For example: - -[source,console] ----- -GET _cat/nodes?h=ip,port,heapPercent,name ----- - -Responds with: - -[source,txt] ----- -127.0.0.1 9300 27 sLBaIGK ----- -// TESTRESPONSE[s/9300 27 sLBaIGK/\\d+ \\d+ .+/ non_json] - -You can also request multiple columns using simple wildcards like -`/_cat/thread_pool?h=ip,queue*` to get all headers (or aliases) starting -with `queue`. - -[discrete] -[[numeric-formats]] -==== Numeric formats - -Many commands provide a few types of numeric output, either a byte, size -or a time value. By default, these types are human-formatted, -for example, `3.5mb` instead of `3763212`. The human values are not -sortable numerically, so in order to operate on these values where -order is important, you can change it. - -Say you want to find the largest index in your cluster (storage used -by all the shards, not number of documents). The `/_cat/indices` API -is ideal. You only need to add three things to the API request: - -. The `bytes` query string parameter with a value of `b` to get byte-level resolution. -. The `s` (sort) parameter with a value of `store.size:desc` and a comma with `index:asc` to sort the output -by shard storage descending order and then index name in ascending order. -. The `v` (verbose) parameter to include column headings in the response. - -[source,console] ----- -GET _cat/indices?bytes=b&s=store.size:desc,index:asc&v=true ----- -// TEST[setup:my_index_huge] -// TEST[s/^/PUT my-index-000002\n{"settings": {"number_of_replicas": 0}}\n/] -// TEST[s/s=store\.size:desc,index:asc/s=index:asc/] - -The API returns the following response: - -[source,txt] ----- -health status index uuid pri rep docs.count docs.deleted store.size pri.store.size dataset.size -yellow open my-index-000001 u8FNjxh8Rfy_awN11oDKYQ 1 1 1200 0 72171 72171 72171 -green open my-index-000002 nYFWZEO7TUiOjLQXBaYJpA 1 0 0 0 230 230 230 ----- -// TESTRESPONSE[s/72171|230/\\d+/] -// TESTRESPONSE[s/u8FNjxh8Rfy_awN11oDKYQ|nYFWZEO7TUiOjLQXBaYJpA/.+/ non_json] - -If you want to change the <>, use `time` parameter. - -If you want to change the <>, use `size` parameter. - -If you want to change the <>, use `bytes` parameter. - -[discrete] -==== Response as text, json, smile, yaml or cbor - -[source,sh] ----- -% curl 'localhost:9200/_cat/indices?format=json&pretty' -[ - { - "pri.store.size": "650b", - "health": "yellow", - "status": "open", - "index": "my-index-000001", - "pri": "5", - "rep": "1", - "docs.count": "0", - "docs.deleted": "0", - "store.size": "650b" - } -] ----- -// NOTCONSOLE - -Currently supported formats (for the `?format=` parameter): -- text (default) -- json -- smile -- yaml -- cbor - -Alternatively you can set the "Accept" HTTP header to the appropriate media format. -All formats above are supported, the GET parameter takes precedence over the header. -For example: - -[source,sh] ----- -% curl '192.168.56.10:9200/_cat/indices?pretty' -H "Accept: application/json" -[ - { - "pri.store.size": "650b", - "health": "yellow", - "status": "open", - "index": "my-index-000001", - "pri": "5", - "rep": "1", - "docs.count": "0", - "docs.deleted": "0", - "store.size": "650b" - } -] ----- -// NOTCONSOLE - -[discrete] -[[sort]] -==== Sort - -Each of the commands accepts a query string parameter `s` which sorts the table by -the columns specified as the parameter value. Columns are specified either by name or by -alias, and are provided as a comma separated string. By default, sorting is done in -ascending fashion. Appending `:desc` to a column will invert the ordering for -that column. `:asc` is also accepted but exhibits the same behavior as the default sort order. - -For example, with a sort string `s=column1,column2:desc,column3`, the table will be -sorted in ascending order by column1, in descending order by column2, and in ascending -order by column3. - -[source,console] ----- -GET _cat/templates?v=true&s=order:desc,index_patterns ----- - -returns: - -[source,txt] ----- -name index_patterns order version -pizza_pepperoni [*pepperoni*] 2 -sushi_california_roll [*avocado*] 1 1 -pizza_hawaiian [*pineapples*] 1 ----- - -include::cat/alias.asciidoc[] - -include::cat/allocation.asciidoc[] - -include::cat/anomaly-detectors.asciidoc[] - -include::cat/component-templates.asciidoc[] - -include::cat/count.asciidoc[] - -include::cat/dataframeanalytics.asciidoc[] - -include::cat/datafeeds.asciidoc[] - -include::cat/fielddata.asciidoc[] - -include::cat/health.asciidoc[] - -include::cat/indices.asciidoc[] - -include::cat/master.asciidoc[] - -include::cat/nodeattrs.asciidoc[] - -include::cat/nodes.asciidoc[] - -include::cat/pending_tasks.asciidoc[] - -include::cat/plugins.asciidoc[] - -include::cat/recovery.asciidoc[] - -include::cat/repositories.asciidoc[] - -include::cat/segments.asciidoc[] - -include::cat/shards.asciidoc[] - -include::cat/snapshots.asciidoc[] - -include::cat/tasks.asciidoc[] - -include::cat/templates.asciidoc[] - -include::cat/thread_pool.asciidoc[] - -include::cat/trainedmodel.asciidoc[] - -include::cat/transforms.asciidoc[] diff --git a/docs/reference/cat/alias.asciidoc b/docs/reference/cat/alias.asciidoc deleted file mode 100644 index 0a7c1828d4876..0000000000000 --- a/docs/reference/cat/alias.asciidoc +++ /dev/null @@ -1,115 +0,0 @@ -[[cat-alias]] -=== cat aliases API -++++ -cat aliases -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or the -{kib} console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Retrieves the cluster's <>, including filter and routing -information. The API does not return <> aliases. - -[[cat-alias-api-request]] -==== {api-request-title} - -`GET _cat/aliases/` - -`GET _cat/aliases` - -[[cat-alias-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the -`view_index_metadata` or `manage` <> -for any alias you retrieve. - -[[cat-alias-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Comma-separated list of aliases to retrieve. Supports -wildcards (`*`). To retrieve all aliases, omit this parameter or use `*` or -`_all`. - -[[cat-alias-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=expand-wildcards] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[cat-alias-api-example]] -==== {api-examples-title} - -//// -Hidden setup for example: -[source,console,id=cat-aliases-example] ----- -PUT test1 -{ - "aliases": { - "alias1": {}, - "alias2": { - "filter": { - "match": { - "user.id": "kimchy" - } - } - }, - "alias3": { - "routing": "1" - }, - "alias4": { - "index_routing": "2", - "search_routing": "1,2" - } - } -} ----- -//// - -[source,console] ----- -GET _cat/aliases?v=true ----- -// TEST[continued] - -The API returns the following response: - -[source,txt] ----- -alias index filter routing.index routing.search is_write_index -alias1 test1 - - - - -alias2 test1 * - - - -alias3 test1 - 1 1 - -alias4 test1 - 2 1,2 - ----- -// TESTRESPONSE[s/[*]/[*]/ non_json] - -This response shows that `alias2` has configured a filter, and specific routing -configurations in `alias3` and `alias4`. - -If you only want to get information about specific aliases, you can specify -the aliases in comma-delimited format as a URL parameter, e.g., -/_cat/aliases/alias1,alias2. diff --git a/docs/reference/cat/allocation.asciidoc b/docs/reference/cat/allocation.asciidoc deleted file mode 100644 index 34b8069b91e27..0000000000000 --- a/docs/reference/cat/allocation.asciidoc +++ /dev/null @@ -1,141 +0,0 @@ -[[cat-allocation]] -=== cat allocation API -++++ -cat allocation -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Provides a snapshot of the number of shards <> to each data node -and their disk space. - - -[[cat-allocation-api-request]] -==== {api-request-title} - -`GET /_cat/allocation/` - -`GET /_cat/allocation` - -[[cat-allocation-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cat-allocation-api-path-params]] -==== {api-path-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=node-id] - -[[cat-allocation-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bytes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=local] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - -[[cat-allocation-api-response-body]] -==== {api-response-body-title} - -`shards`:: -Number of primary and replica shards assigned to the node. - -`shards.undesired`:: -Amount of shards that are scheduled to be moved elsewhere in the cluster -or -1 other than desired balance allocator is used - -`write_load.forecast`:: -Sum of index write load forecasts - -`disk.indices.forecast`:: -Sum of shard size forecasts - -`disk.indices`:: -Disk space used by the node's shards. Does not include disk space for the -<> or unassigned shards. -+ -IMPORTANT: This metric double-counts disk space for hard-linked files, such as -those created when <>, -<>, or <> an index. - -`disk.used`:: -+ --- -Total disk space in use. {es} retrieves this metric from the node's operating -system (OS). The metric includes disk space for: - -- {es}, including the <> and unassigned shards -- The node's OS -- Any other applications or files on the node - -Unlike `disk.indices`, this metric does not double-count disk space for -hard-linked files. --- - -`disk.avail`:: -Free disk space available to {es}. {es} retrieves this metric from the node's -OS. <> uses this metric to assign -shards to nodes based on available disk space. - -`disk.total`:: -Total disk space for the node, including in-use and available space. - -`disk.percent`:: -Total percentage of disk space in use. Calculated as `disk.used` / `disk.total`. - -`host`:: -Network host for the node. Set using <>. - -`ip`:: -IP address and port for the node. - -`node`:: -Name for the node. Set using <>. - -`node.role`, `r`, `role`, `nodeRole`:: -Node roles - -[[cat-allocation-api-example]] -==== {api-examples-title} - -[source,console,id=cat-allocation-example] --------------------------------------------------- -GET /_cat/allocation?v=true --------------------------------------------------- -// TEST[s/^/PUT test\n{"settings": {"number_of_replicas": 0}}\n/] - -The API returns the following response: - -[source,txt] --------------------------------------------------- -shards shards.undesired write_load.forecast disk.indices.forecast disk.indices disk.used disk.avail disk.total disk.percent host ip node node.role - 1 0 0.0 260b 260b 47.3gb 43.4gb 100.7gb 46 127.0.0.1 127.0.0.1 CSUXak2 himrst --------------------------------------------------- -// TESTRESPONSE[s/\d+(\.\d+)?[tgmk]?b/\\d+(\\.\\d+)?[tgmk]?b/ s/46/\\d+/] -// TESTRESPONSE[s/CSUXak2 himrst/.+/ non_json] - -This response shows a single shard is allocated to the one node available. diff --git a/docs/reference/cat/anomaly-detectors.asciidoc b/docs/reference/cat/anomaly-detectors.asciidoc deleted file mode 100644 index 03cd824092cf6..0000000000000 --- a/docs/reference/cat/anomaly-detectors.asciidoc +++ /dev/null @@ -1,300 +0,0 @@ -[role="xpack"] -[[cat-anomaly-detectors]] -=== cat anomaly detectors API -++++ -cat anomaly detectors -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the -<>. -==== - -Returns configuration and usage information about {ml-docs}/ml-ad-overview.html[{anomaly-jobs}]. - -[[cat-anomaly-detectors-request]] -==== {api-request-title} - -`GET /_cat/ml/anomaly_detectors/` + - -`GET /_cat/ml/anomaly_detectors` - -[[cat-anomaly-detectors-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `monitor_ml`, -`monitor`, `manage_ml`, or `manage` cluster privileges to use this API. See -<> and {ml-docs-setup-privileges}. - - -[[cat-anomaly-detectors-desc]] -==== {api-description-title} - -NOTE: This API returns a maximum of 10,000 jobs. - -For more information about {anomaly-detect}, see -{ml-docs}/ml-ad-finding-anomalies.html[Finding anomalies]. - -[[cat-anomaly-detectors-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] - -[[cat-anomaly-detectors-query-params]] -==== {api-query-parms-title} - -`allow_no_match`:: -(Optional, Boolean) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=allow-no-match-jobs] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bytes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] -+ -If you do not specify which columns to include, the API returns the default -columns. If you explicitly specify one or more columns, it returns only the -specified columns. -+ -Valid columns are: - -`assignment_explanation`, `ae`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=assignment-explanation-anomaly-jobs] - -`buckets.count`, `bc`, `bucketsCount`::: -(Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=bucket-count-anomaly-jobs] - -`buckets.time.exp_avg`, `btea`, `bucketsTimeExpAvg`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average] - -`buckets.time.exp_avg_hour`, `bteah`, `bucketsTimeExpAvgHour`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average-hour] - -`buckets.time.max`, `btmax`, `bucketsTimeMax`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=bucket-time-maximum] - -`buckets.time.min`, `btmin`, `bucketsTimeMin`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=bucket-time-minimum] - -`buckets.time.total`, `btt`, `bucketsTimeTotal`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=bucket-time-total] - -`data.buckets`, `db`, `dataBuckets`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=bucket-count] - -`data.earliest_record`, `der`, `dataEarliestRecord`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=earliest-record-timestamp] - -`data.empty_buckets`, `deb`, `dataEmptyBuckets`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=empty-bucket-count] - -`data.input_bytes`, `dib`, `dataInputBytes`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=input-bytes] - -`data.input_fields`, `dif`, `dataInputFields`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=input-field-count] - -`data.input_records`, `dir`, `dataInputRecords`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=input-record-count] - -`data.invalid_dates`, `did`, `dataInvalidDates`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=invalid-date-count] - -`data.last`, `dl`, `dataLast`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=last-data-time] - -`data.last_empty_bucket`, `dleb`, `dataLastEmptyBucket`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=latest-empty-bucket-timestamp] - -`data.last_sparse_bucket`, `dlsb`, `dataLastSparseBucket`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=latest-sparse-record-timestamp] - -`data.latest_record`, `dlr`, `dataLatestRecord`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=latest-record-timestamp] - -`data.missing_fields`, `dmf`, `dataMissingFields`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=missing-field-count] - -`data.out_of_order_timestamps`, `doot`, `dataOutOfOrderTimestamps`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=out-of-order-timestamp-count] - -`data.processed_fields`, `dpf`, `dataProcessedFields`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=processed-field-count] - -`data.processed_records`, `dpr`, `dataProcessedRecords`::: -(Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=processed-record-count] - -`data.sparse_buckets`, `dsb`, `dataSparseBuckets`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=sparse-bucket-count] - -`forecasts.memory.avg`, `fmavg`, `forecastsMemoryAvg`::: -The average memory usage in bytes for forecasts related to the {anomaly-job}. - -`forecasts.memory.max`, `fmmax`, `forecastsMemoryMax`::: -The maximum memory usage in bytes for forecasts related to the {anomaly-job}. - -`forecasts.memory.min`, `fmmin`, `forecastsMemoryMin`::: -The minimum memory usage in bytes for forecasts related to the {anomaly-job}. - -`forecasts.memory.total`, `fmt`, `forecastsMemoryTotal`::: -The total memory usage in bytes for forecasts related to the {anomaly-job}. - -`forecasts.records.avg`, `fravg`, `forecastsRecordsAvg`::: -The average number of `model_forecast` documents written for forecasts related -to the {anomaly-job}. - -`forecasts.records.max`, `frmax`, `forecastsRecordsMax`::: -The maximum number of `model_forecast` documents written for forecasts related -to the {anomaly-job}. - -`forecasts.records.min`, `frmin`, `forecastsRecordsMin`::: -The minimum number of `model_forecast` documents written for forecasts related -to the {anomaly-job}. - -`forecasts.records.total`, `frt`, `forecastsRecordsTotal`::: -The total number of `model_forecast` documents written for forecasts related to -the {anomaly-job}. - -`forecasts.time.avg`, `ftavg`, `forecastsTimeAvg`::: -The average runtime in milliseconds for forecasts related to the {anomaly-job}. - -`forecasts.time.max`, `ftmax`, `forecastsTimeMax`::: -The maximum runtime in milliseconds for forecasts related to the {anomaly-job}. - -`forecasts.time.min`, `ftmin`, `forecastsTimeMin`::: -The minimum runtime in milliseconds for forecasts related to the {anomaly-job}. - -`forecasts.time.total`, `ftt`, `forecastsTimeTotal`::: -The total runtime in milliseconds for forecasts related to the {anomaly-job}. - -`forecasts.total`, `ft`, `forecastsTotal`::: -(Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=forecast-total] - -`id`::: -(Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] - -`model.bucket_allocation_failures`, `mbaf`, `modelBucketAllocationFailures`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=bucket-allocation-failures-count] - -`model.by_fields`, `mbf`, `modelByFields`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=total-by-field-count] - -`model.bytes`, `mb`, `modelBytes`::: -(Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-bytes] - -`model.bytes_exceeded`, `mbe`, `modelBytesExceeded`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-bytes-exceeded] - -`model.categorization_status`, `mcs`, `modelCategorizationStatus`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=categorization-status] - -`model.categorized_doc_count`, `mcdc`, `modelCategorizedDocCount`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=categorized-doc-count] - -`model.dead_category_count`, `mdcc`, `modelDeadCategoryCount`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=dead-category-count] - -`model.failed_category_count`, `mdcc`, `modelFailedCategoryCount`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=failed-category-count] - -`model.frequent_category_count`, `mfcc`, `modelFrequentCategoryCount`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=frequent-category-count] - -`model.log_time`, `mlt`, `modelLogTime`::: -The timestamp when the model stats were gathered, according to server time. - -`model.memory_limit`, `mml`, `modelMemoryLimit`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-memory-limit-anomaly-jobs] - -`model.memory_status`, `mms`, `modelMemoryStatus`::: -(Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-memory-status] - -`model.output_memory_allocator_bytes`, `momab`, `modelOutputMemoryAllocatorBytes`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=output-memory-allocator-bytes] - -`model.over_fields`, `mof`, `modelOverFields`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=total-over-field-count] - -`model.partition_fields`, `mpf`, `modelPartitionFields`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=total-partition-field-count] - -`model.rare_category_count`, `mrcc`, `modelRareCategoryCount`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=rare-category-count] - -`model.timestamp`, `mt`, `modelTimestamp`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-timestamp] - -`model.total_category_count`, `mtcc`, `modelTotalCategoryCount`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=total-category-count] - -`node.address`, `na`, `nodeAddress`::: -The network address of the node. -+ -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=node-jobs] - -`node.ephemeral_id`, `ne`, `nodeEphemeralId`::: -The ephemeral ID of the node. -+ -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=node-jobs] - -`node.id`, `ni`, `nodeId`::: -The unique identifier of the node. -+ -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=node-jobs] - -`node.name`, `nn`, `nodeName`::: -The node name. -+ -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=node-jobs] - -`opened_time`, `ot`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=open-time] - -`state`, `s`::: -(Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=state-anomaly-job] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - -[[cat-anomaly-detectors-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET _cat/ml/anomaly_detectors?h=id,s,dpr,mb&v=true --------------------------------------------------- -// TEST[skip:kibana sample data] - -[source,console-result] ----- -id s dpr mb -high_sum_total_sales closed 14022 1.5mb -low_request_rate closed 1216 40.5kb -response_code_rates closed 28146 132.7kb -url_scanning closed 28146 501.6kb ----- -// TESTRESPONSE[skip:kibana sample data] diff --git a/docs/reference/cat/component-templates.asciidoc b/docs/reference/cat/component-templates.asciidoc deleted file mode 100644 index 8be1096a215f3..0000000000000 --- a/docs/reference/cat/component-templates.asciidoc +++ /dev/null @@ -1,136 +0,0 @@ -[[cat-component-templates]] -=== cat component templates API -++++ -cat component templates -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns information about <> in -a cluster. Component templates are building blocks for constructing -<> that specify index <>, -<>, and <>. - - -[[cat-component-templates-api-request]] -==== {api-request-title} - -`GET /_cat/component_templates/` - -`GET /_cat/component_templates` - -[[cat-component-templates-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cat-component-templates-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) The name of the component template to return. Accepts -wildcard expressions. If omitted, all component templates are returned. - -[[cat-component-templates-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=local] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-component-templates-api-example]] -==== {api-examples-title} - -//// -[source,console] ----- -PUT _component_template/my-template-1 -{ - "template": { - "settings": { - "number_of_shards": 1 - } - } -} - -PUT _component_template/my-template-2 -{ - "template": { - "mappings": { - "_source": { - "enabled": false - }, - "properties": { - "host_name": { - "type": "keyword" - }, - "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" - } - } - } - } -} - -PUT _index_template/my-index-template -{ - "index_patterns": [ - "my-index*" - ], - "composed_of": [ - "my-template-1", - "my-template-2" - ] -} ----- -//// - -[source,console] ----- -GET _cat/component_templates/my-template-*?v=true&s=name ----- -// TEST[continued] - -The API returns the following response: - -[source,txt] ----- -name version alias_count mapping_count settings_count metadata_count included_in -my-template-1 0 0 1 0 [my-index-template] -my-template-2 0 3 0 0 [my-index-template] ----- -// TESTRESPONSE[s/\*/\\*/ s/\[/\\[/ s/\]/\\]/ non_json] - -//// -[source,console] ----- -DELETE _index_template/my-index-template -DELETE _component_template/my-template-1 -DELETE _component_template/my-template-2 ----- -// TEST[continued] -//// diff --git a/docs/reference/cat/count.asciidoc b/docs/reference/cat/count.asciidoc deleted file mode 100644 index 7adcd1464dab1..0000000000000 --- a/docs/reference/cat/count.asciidoc +++ /dev/null @@ -1,108 +0,0 @@ -[[cat-count]] -=== cat count API -++++ -cat count -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Provides quick access to a document count for a data stream, an index, or an -entire cluster. - -NOTE: The document count only includes live documents, not deleted documents -which have not yet been removed by the merge process. - - -[[cat-count-api-request]] -==== {api-request-title} - -`GET /_cat/count/` - -`GET /_cat/count` - -[[cat-count-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `read` -<> for any data stream, index, or alias -you retrieve. - -[[cat-count-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Comma-separated list of data streams, indices, and aliases -used to limit the request. Supports wildcards (`*`). To target all data streams -and indices, omit this parameter or use `*` or `_all`. - -[[cat-count-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-count-api-example]] -==== {api-examples-title} - -[[cat-count-api-example-ind]] -===== Example with an individual data stream or index - -The following `count` API request retrieves the document count for the -`my-index-000001` data stream or index. - -[source,console,id=cat-count-individual-example] --------------------------------------------------- -GET /_cat/count/my-index-000001?v=true --------------------------------------------------- -// TEST[setup:my_index_big] - - -The API returns the following response: - -[source,txt] --------------------------------------------------- -epoch timestamp count -1475868259 15:24:20 120 --------------------------------------------------- -// TESTRESPONSE[s/1475868259 15:24:20/\\d+ \\d+:\\d+:\\d+/ non_json] - -[[cat-count-api-example-all]] -===== Example with all data streams and indices in a cluster - -The following `count` API request retrieves the document count for all data -streams and indices in the cluster. - -[source,console,id=cat-count-all-example] --------------------------------------------------- -GET /_cat/count?v=true --------------------------------------------------- -// TEST[setup:my_index_big] -// TEST[s/^/POST test\/_doc\?refresh\n{"test": "test"}\n/] - -The API returns the following response: - -[source,txt] --------------------------------------------------- -epoch timestamp count -1475868259 15:24:20 121 --------------------------------------------------- -// TESTRESPONSE[s/1475868259 15:24:20/\\d+ \\d+:\\d+:\\d+/ non_json] diff --git a/docs/reference/cat/datafeeds.asciidoc b/docs/reference/cat/datafeeds.asciidoc deleted file mode 100644 index 29f5bc8150af1..0000000000000 --- a/docs/reference/cat/datafeeds.asciidoc +++ /dev/null @@ -1,143 +0,0 @@ -[role="xpack"] -[[cat-datafeeds]] -=== cat {dfeeds} API -++++ -cat {dfeeds} -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns configuration and usage information about {ml-docs}/ml-ad-run-jobs.html#ml-ad-datafeeds[{dfeeds}]. - -[[cat-datafeeds-request]] -==== {api-request-title} - -`GET /_cat/ml/datafeeds/` + - -`GET /_cat/ml/datafeeds` - -[[cat-datafeeds-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `monitor_ml`, -`monitor`, `manage_ml`, or `manage` cluster privileges to use this API. See -<> and {ml-docs-setup-privileges}. - - -[[cat-datafeeds-desc]] -==== {api-description-title} - -{dfeeds-cap} retrieve data from {es} for analysis by {anomaly-jobs}. For more -information, see {ml-docs}/ml-dfeeds.html[{dfeeds-cap}]. - -NOTE: This API returns a maximum of 10,000 jobs. - -[[cat-datafeeds-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=datafeed-id] - -[[cat-datafeeds-query-params]] -==== {api-query-parms-title} - -`allow_no_match`:: -(Optional, Boolean) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=allow-no-match-datafeeds] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] -+ -If you do not specify which columns to include, the API returns the default -columns. If you explicitly specify one or more columns, it returns only the -specified columns. -+ -Valid columns are: - -`assignment_explanation`, `ae`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=assignment-explanation-datafeeds] - -`buckets.count`, `bc`, `bucketsCount`::: -(Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=bucket-count] - -`id`::: -(Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=datafeed-id] - -`node.address`, `na`, `nodeAddress`::: -The network address of the node. -+ -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=node-datafeeds] - -`node.ephemeral_id`, `ne`, `nodeEphemeralId`::: -The ephemeral ID of the node. -+ -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=node-datafeeds] - -`node.id`, `ni`, `nodeId`::: -The unique identifier of the node. -+ -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=node-datafeeds] - -`node.name`, `nn`, `nodeName`::: -The node name. -+ -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=node-datafeeds] - -`search.bucket_avg`, `sba`, `searchBucketAvg`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=search-bucket-avg] - -`search.count`, `sc`, `searchCount`::: -(Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=search-count] - -`search.exp_avg_hour`, `seah`, `searchExpAvgHour`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=search-exp-avg-hour] - -`search.time`, `st`, `searchTime`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=search-time] - -`state`, `s`::: -(Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=state-datafeed] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - -[[cat-datafeeds-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET _cat/ml/datafeeds?v=true --------------------------------------------------- -// TEST[skip:kibana sample data] - -[source,console-result] ----- -id state buckets.count search.count -datafeed-high_sum_total_sales stopped 743 7 -datafeed-low_request_rate stopped 1457 3 -datafeed-response_code_rates stopped 1460 18 -datafeed-url_scanning stopped 1460 18 ----- -// TESTRESPONSE[skip:kibana sample data] diff --git a/docs/reference/cat/dataframeanalytics.asciidoc b/docs/reference/cat/dataframeanalytics.asciidoc deleted file mode 100644 index f00a9826ee5bd..0000000000000 --- a/docs/reference/cat/dataframeanalytics.asciidoc +++ /dev/null @@ -1,151 +0,0 @@ -[role="xpack"] -[[cat-dfanalytics]] -=== cat {dfanalytics} API -++++ -cat {dfanalytics} -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the -<>. -==== - -Returns configuration and usage information about {ml-docs}/ml-dfanalytics.html[{dfanalytics-jobs}]. - - -[[cat-dfanalytics-request]] -==== {api-request-title} - -`GET /_cat/ml/data_frame/analytics/` + - -`GET /_cat/ml/data_frame/analytics` - - -[[cat-dfanalytics-prereqs]] -==== {api-prereq-title} - -If the {es} {security-features} are enabled, you must have the following -privileges: - -* cluster: `monitor_ml` - -For more information, see <> and {ml-docs-setup-privileges}. - - -//// -[[cat-dfanalytics-desc]] -==== {api-description-title} -//// - - -[[cat-dfanalytics-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=job-id-data-frame-analytics-default] - - -[[cat-dfanalytics-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] -+ -If you do not specify which columns to include, the API returns the default -columns. If you explicitly specify one or more columns, it returns only the -specified columns. -+ -Valid columns are: - -`assignment_explanation`, `ae`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=assignment-explanation-dfanalytics] - -`create_time`, `ct`, `createTime`::: -(Default) -The time when the {dfanalytics-job} was created. - -`description`, `d`::: -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=description-dfa] - -`dest_index`, `di`, `destIndex`::: -Name of the destination index. - -`failure_reason`, `fr`, `failureReason`::: -Contains messages about the reason why a {dfanalytics-job} failed. - -`id`::: -(Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=job-id-data-frame-analytics] - -`model_memory_limit`, `mml`, `modelMemoryLimit`::: -The approximate maximum amount of memory resources that are permitted for the -{dfanalytics-job}. - -`node.address`, `na`, `nodeAddress`::: -The network address of the node that the {dfanalytics-job} is assigned to. - -`node.ephemeral_id`, `ne`, `nodeEphemeralId`::: -The ephemeral ID of the node that the {dfanalytics-job} is assigned to. - -`node.id`, `ni`, `nodeId`::: -The unique identifier of the node that the {dfanalytics-job} is assigned to. - -`node.name`, `nn`, `nodeName`::: -The name of the node that the {dfanalytics-job} is assigned to. - -`progress`, `p`::: -The progress report of the {dfanalytics-job} by phase. - -`source_index`, `si`, `sourceIndex`::: -Name of the source index. - -`state`, `s`::: -(Default) -Current state of the {dfanalytics-job}. - -`type`, `t`::: -(Default) -The type of analysis that the {dfanalytics-job} performs. - -`version`, `v`::: -The {es} version number in which the {dfanalytics-job} was created. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-dfanalytics-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET _cat/ml/data_frame/analytics?v=true --------------------------------------------------- -// TEST[skip:kibana sample data] - -[source,console-result] ----- -id create_time type state -classifier_job_1 2020-02-12T11:49:09.594Z classification stopped -classifier_job_2 2020-02-12T11:49:14.479Z classification stopped -classifier_job_3 2020-02-12T11:49:16.928Z classification stopped -classifier_job_4 2020-02-12T11:49:19.127Z classification stopped -classifier_job_5 2020-02-12T11:49:21.349Z classification stopped ----- -// TESTRESPONSE[skip:kibana sample data] diff --git a/docs/reference/cat/fielddata.asciidoc b/docs/reference/cat/fielddata.asciidoc deleted file mode 100644 index f11e40263ec2b..0000000000000 --- a/docs/reference/cat/fielddata.asciidoc +++ /dev/null @@ -1,173 +0,0 @@ -[[cat-fielddata]] -=== cat fielddata API -++++ -cat fielddata -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns the amount of heap memory currently used by the -<> on every data node in the cluster. - - -[[cat-fielddata-api-request]] -==== {api-request-title} - -`GET /_cat/fielddata/` - -`GET /_cat/fielddata` - -[[cat-fielddata-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cat-fielddata-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Comma-separated list of fields used to limit returned -information. - - -[[cat-fielddata-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bytes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-fielddata-api-example]] -==== {api-examples-title} - -//// -Hidden setup snippet to build an index with fielddata so our results are real: - -[source,console,id=cat-fielddata-example] --------------------------------------------------- -PUT test -{ - "mappings": { - "properties": { - "body": { - "type": "text", - "fielddata":true - }, - "soul": { - "type": "text", - "fielddata":true - }, - "mind": { - "type": "text", - "fielddata":true - } - } - } -} -POST test/_doc?refresh -{ - "body": "some words so there is a little field data", - "soul": "some more words", - "mind": "even more words" -} - -# Perform a search to load the field data -POST test/_search?sort=body,soul,mind --------------------------------------------------- -//// - -[[cat-fielddata-api-example-ind]] -===== Example with an individual field - -You can specify an individual field in the request body or URL path. The -following `fieldata` API request retrieves heap memory size information for the -`body` field. - -[source,console] --------------------------------------------------- -GET /_cat/fielddata?v=true&fields=body --------------------------------------------------- -// TEST[continued] - -The API returns the following response: - -[source,txt] --------------------------------------------------- -id host ip node field size -Nqk-6inXQq-OxUfOUI8jNQ 127.0.0.1 127.0.0.1 Nqk-6in body 544b --------------------------------------------------- -// TESTRESPONSE[s/544b|480b/\\d+(\\.\\d+)?[tgmk]?b/] -// TESTRESPONSE[s/Nqk-6in[^ ]*/.+/ non_json] - -[[cat-fielddata-api-example-list]] -===== Example with a list of fields - -You can specify a comma-separated list of fields in the request body or URL -path. The following `fieldata` API request retrieves heap memory size -information for the `body` and `soul` fields. - - -[source,console] --------------------------------------------------- -GET /_cat/fielddata/body,soul?v=true --------------------------------------------------- -// TEST[continued] - -The API returns the following response: - -[source,txt] --------------------------------------------------- -id host ip node field size -Nqk-6inXQq-OxUfOUI8jNQ 127.0.0.1 127.0.0.1 Nqk-6in body 544b -Nqk-6inXQq-OxUfOUI8jNQ 127.0.0.1 127.0.0.1 Nqk-6in soul 480b --------------------------------------------------- -// TESTRESPONSE[s/544b|480b/\\d+(\\.\\d+)?[tgmk]?b/] -// TESTRESPONSE[s/Nqk-6in[^ ]*/.+/ s/soul|body/\\w+/ non_json] - -The response shows the individual fielddata for the `body` and `soul` fields, -one row per field per node. - -[[cat-fielddata-api-example-all]] -===== Example with all fields in a cluster - -The following `fieldata` API request retrieves heap memory size -information all fields. - -[source,console] --------------------------------------------------- -GET /_cat/fielddata?v=true --------------------------------------------------- -// TEST[continued] - -The API returns the following response: - -[source,txt] --------------------------------------------------- -id host ip node field size -Nqk-6inXQq-OxUfOUI8jNQ 127.0.0.1 127.0.0.1 Nqk-6in body 544b -Nqk-6inXQq-OxUfOUI8jNQ 127.0.0.1 127.0.0.1 Nqk-6in mind 360b -Nqk-6inXQq-OxUfOUI8jNQ 127.0.0.1 127.0.0.1 Nqk-6in soul 480b --------------------------------------------------- -// TESTRESPONSE[s/544b|480b|360b/\\d+(\\.\\d+)?[tgmk]?b/] -// TESTRESPONSE[s/Nqk-6in[^ ]*/.+/ s/soul|body|mind/\\w+/ non_json] \ No newline at end of file diff --git a/docs/reference/cat/health.asciidoc b/docs/reference/cat/health.asciidoc deleted file mode 100644 index 7ffc170ec8515..0000000000000 --- a/docs/reference/cat/health.asciidoc +++ /dev/null @@ -1,160 +0,0 @@ -[[cat-health]] -=== cat health API -++++ -cat health -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns the health status of a cluster, similar to the <> API. - - -[[cat-health-api-request]] -==== {api-request-title} - -`GET /_cat/health` - -[[cat-health-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cat-health-api-desc]] -==== {api-description-title} - -You can use the cat health API to get the health status of a cluster. - -[[timestamp]] -This API is often used to check malfunctioning clusters. To help you -track cluster health alongside log files and alerting systems, the API returns -timestamps in two formats: - -* `HH:MM:SS`, which is human-readable but includes no date information. -* {wikipedia}/Unix_time[Unix `epoch` time], which is -machine-sortable and includes date information. This is useful for cluster -recoveries that take multiple days. - -You can use the cat health API to verify cluster health across multiple nodes. -See <>. - -You also can use the API to track the recovery of a large cluster -over a longer period of time. See <>. - - -[[cat-health-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -`ts` (timestamps):: -(Optional, Boolean) If `true`, returns `HH:MM:SS` and -{wikipedia}/Unix_time[Unix `epoch`] timestamps. Defaults to -`true`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-health-api-example]] -==== {api-examples-title} - -[[cat-health-api-example-timestamp]] -===== Example with a timestamp -By default, the cat health API returns `HH:MM:SS` and -{wikipedia}/Unix_time[Unix `epoch`] timestamps. For example: - -[source,console,id=cat-health-example] --------------------------------------------------- -GET /_cat/health?v=true --------------------------------------------------- -// TEST[s/^/PUT my-index-000001\n{"settings":{"number_of_replicas": 0}}\n/] - -The API returns the following response: - -[source,txt] --------------------------------------------------- -epoch timestamp cluster status node.total node.data shards pri relo init unassign unassign.pri pending_tasks max_task_wait_time active_shards_percent -1475871424 16:17:04 elasticsearch green 1 1 1 1 0 0 0 0 0 - 100.0% --------------------------------------------------- -// TESTRESPONSE[s/1475871424 16:17:04/\\d+ \\d+:\\d+:\\d+/] -// TESTRESPONSE[s/elasticsearch/[^ ]+/ s/0 -/\\d+ (-|\\d+(\\.\\d+)?[ms]+)/ non_json] - -[[cat-health-api-example-no-timestamp]] -===== Example without a timestamp -You can use the `ts` (timestamps) parameter to disable timestamps. For example: - -[source,console,id=cat-health-no-timestamp-example] --------------------------------------------------- -GET /_cat/health?v=true&ts=false --------------------------------------------------- -// TEST[s/^/PUT my-index-000001\n{"settings":{"number_of_replicas": 0}}\n/] - -The API returns the following response: - -[source,txt] --------------------------------------------------- -cluster status node.total node.data shards pri relo init unassign unassign.pri pending_tasks max_task_wait_time active_shards_percent -elasticsearch green 1 1 1 1 0 0 0 0 0 - 100.0% --------------------------------------------------- -// TESTRESPONSE[s/elasticsearch/[^ ]+/ s/0 -/\\d+ (-|\\d+(\\.\\d+)?[ms]+)/ non_json] - -**Note**: The reported number of unassigned primary shards may be lower than the true value if your cluster contains nodes running a version below 8.16. For a more accurate count in this scenario, please use the <>. - -[[cat-health-api-example-across-nodes]] -===== Example across nodes -You can use the cat health API to verify the health of a cluster across nodes. -For example: - -[source,sh] --------------------------------------------------- -% pssh -i -h list.of.cluster.hosts curl -s localhost:9200/_cat/health -[1] 20:20:52 [SUCCESS] es3.vm -1384309218 18:20:18 foo green 3 3 3 3 0 0 0 0 0 -[2] 20:20:52 [SUCCESS] es1.vm -1384309218 18:20:18 foo green 3 3 3 3 0 0 0 0 0 -[3] 20:20:52 [SUCCESS] es2.vm -1384309218 18:20:18 foo green 3 3 3 3 0 0 0 0 0 --------------------------------------------------- -// NOTCONSOLE - -[[cat-health-api-example-large-cluster]] -===== Example with a large cluster -You can use the cat health API to track the recovery of a large cluster over a -longer period of time. You can do this by including the cat health API request -in a delayed loop. For example: - -[source,sh] --------------------------------------------------- -% while true; do curl localhost:9200/_cat/health; sleep 120; done -1384309446 18:24:06 foo red 3 3 20 20 0 0 1812 1121 0 -1384309566 18:26:06 foo yellow 3 3 950 916 0 12 870 421 0 -1384309686 18:28:06 foo yellow 3 3 1328 916 0 12 492 301 0 -1384309806 18:30:06 foo green 3 3 1832 916 4 0 0 0 -^C --------------------------------------------------- -// NOTCONSOLE - -In this example, the recovery took roughly six minutes, from `18:24:06` to -`18:30:06`. If this recovery took hours, you could continue to monitor the -number of `UNASSIGNED` shards, which should drop. If the number of `UNASSIGNED` -shards remains static, it would indicate an issue with the cluster recovery. diff --git a/docs/reference/cat/indices.asciidoc b/docs/reference/cat/indices.asciidoc deleted file mode 100644 index 3397c05f49735..0000000000000 --- a/docs/reference/cat/indices.asciidoc +++ /dev/null @@ -1,134 +0,0 @@ -[[cat-indices]] -=== cat indices API -++++ -cat indices -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns high-level information about <> in a cluster, including backing -indices for <>. - - -[[cat-indices-api-request]] -==== {api-request-title} - -`GET /_cat/indices/` - -`GET /_cat/indices` - -[[cat-indices-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. You must -also have the `monitor` or `manage` <> -for any data stream, index, or alias you retrieve. - -[[cat-indices-api-desc]] -==== {api-description-title} - -Use the cat indices API to get the following information for each index in a -cluster: - -* Shard count -* Document count -* Deleted document count -* Primary store size -* Total store size of all shards, including shard replicas - -These metrics are retrieved directly from -https://lucene.apache.org/core/[Lucene], which {es} uses internally to power -indexing and search. As a result, all document counts include hidden -<> documents. - -To get an accurate count of {es} documents, use the <> or -<> APIs. - -Note that information such as document count, deleted document count and store size are not shown for -indices restored from <> since these indices -do not contain the relevant data structures to retrieve this information from. - - -[[cat-indices-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Comma-separated list of data streams, indices, and aliases -used to limit the request. Supports wildcards (`*`). To target all data streams -and indices, omit this parameter or use `*` or `_all`. - -[[cat-indices-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bytes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -`health`:: -+ --- -(Optional, string) Health status used to limit returned indices. Valid values -are: - -* `green` -* `yellow` -* `red` - -By default, the response includes indices of any health status. --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=include-unloaded-segments] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[pri-flag]] -`pri` (primary shards):: -(Optional, Boolean) If `true`, the response only includes information from -primary shards. Defaults to `false`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=expand-wildcards] - - -[[cat-indices-api-example]] -==== {api-examples-title} - -[[examples]] -[source,console] --------------------------------------------------- -GET /_cat/indices/my-index-*?v=true&s=index --------------------------------------------------- -// TEST[setup:my_index_huge] -// TEST[s/^/PUT my-index-000002\n{"settings": {"number_of_replicas": 0}}\n/] - -The API returns the following response: - -[source,txt] --------------------------------------------------- -health status index uuid pri rep docs.count docs.deleted store.size pri.store.size dataset.size -yellow open my-index-000001 u8FNjxh8Rfy_awN11oDKYQ 1 1 1200 0 88.1kb 88.1kb 88.1kb -green open my-index-000002 nYFWZEO7TUiOjLQXBaYJpA 1 0 0 0 260b 260b 260b --------------------------------------------------- -// TESTRESPONSE[s/\d+(\.\d+)?[tgmk]?b/\\d+(\\.\\d+)?[tgmk]?b/] -// TESTRESPONSE[s/u8FNjxh8Rfy_awN11oDKYQ|nYFWZEO7TUiOjLQXBaYJpA/.+/ non_json] diff --git a/docs/reference/cat/master.asciidoc b/docs/reference/cat/master.asciidoc deleted file mode 100644 index 4ac40ff50be60..0000000000000 --- a/docs/reference/cat/master.asciidoc +++ /dev/null @@ -1,84 +0,0 @@ -[[cat-master]] -=== cat master API -++++ -cat master -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns information about the <>, including the ID, bound IP address, -and name. - - -[[cat-master-api-request]] -==== {api-request-title} - -`GET /_cat/master` - -[[cat-master-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cat-master-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=local] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-master-api-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET /_cat/master?v=true --------------------------------------------------- - -The API returns the following response: - -[source,txt] --------------------------------------------------- -id host ip node -YzWoH_2BT-6UjVGDyPdqYg 127.0.0.1 127.0.0.1 YzWoH_2 --------------------------------------------------- -// TESTRESPONSE[s/YzWoH_2.+/.+/ non_json] - -This information is also available via the `nodes` command, but this -is slightly shorter when all you want to do, for example, is verify -all nodes agree on the master: - -[source,sh] --------------------------------------------------- -% pssh -i -h list.of.cluster.hosts curl -s localhost:9200/_cat/master -[1] 19:16:37 [SUCCESS] es3.vm -Ntgn2DcuTjGuXlhKDUD4vA 192.168.56.30 H5dfFeA -[2] 19:16:37 [SUCCESS] es2.vm -Ntgn2DcuTjGuXlhKDUD4vA 192.168.56.30 H5dfFeA -[3] 19:16:37 [SUCCESS] es1.vm -Ntgn2DcuTjGuXlhKDUD4vA 192.168.56.30 H5dfFeA --------------------------------------------------- -// NOTCONSOLE diff --git a/docs/reference/cat/nodeattrs.asciidoc b/docs/reference/cat/nodeattrs.asciidoc deleted file mode 100644 index 6c8093846030c..0000000000000 --- a/docs/reference/cat/nodeattrs.asciidoc +++ /dev/null @@ -1,139 +0,0 @@ -[[cat-nodeattrs]] -=== cat nodeattrs API -++++ -cat nodeattrs -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns information about <>. - -[[cat-nodeattrs-api-request]] -==== {api-request-title} - -`GET /_cat/nodeattrs` - -[[cat-nodeattrs-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cat-nodeattrs-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] -+ --- -If you do not specify which columns to include, the API returns the default columns in the order listed below. If you explicitly specify one or more columns, it only returns the specified columns. - -Valid columns are: - -`node`,`name`:: -(Default) Name of the node, such as `DKDM97B`. - -`host`, `h`:: -(Default) Host name, such as `n1`. - -`ip`, `i`:: -(Default) IP address, such as `127.0.1.1`. - -`attr`, `attr.name`:: -(Default) Attribute name, such as `rack`. - -`value`, `attr.value`:: -(Default) Attribute value, such as `rack123`. - -`id`, `nodeId`:: -ID of the node, such as `k0zy`. - -`pid`, `p`:: -Process ID, such as `13061`. - -`port`, `po`:: -Bound transport port, such as `9300`. --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=local] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-nodeattrs-api-example]] -==== {api-examples-title} - -[[cat-nodeattrs-api-ex-default]] -===== Example with default columns - -[source,console] --------------------------------------------------- -GET /_cat/nodeattrs?v=true --------------------------------------------------- -// TEST[s/\?v=true/\?v=true&s=node,attr/] -// Sort the resulting attributes so we can assert on them more easily - -The API returns the following response: - -[source,txt] --------------------------------------------------- -node host ip attr value -... -node-0 127.0.0.1 127.0.0.1 testattr test -... --------------------------------------------------- -// TESTRESPONSE[s/\.\.\.\n$/\n(.+ xpack\\.installed true\n)?\n/] -// TESTRESPONSE[s/\.\.\.\n/(.+ ml\\..+\n)*/ non_json] -// If xpack is not installed then neither ... with match anything -// If xpack is installed then the first ... contains ml attributes -// and the second contains xpack.installed=true - -The `node`, `host`, and `ip` columns provide basic information about each node. -The `attr` and `value` columns return custom node attributes, one per line. - -[[cat-nodeattrs-api-ex-headings]] -===== Example with explicit columns - -The following API request returns the `name`, `pid`, `attr`, and `value` -columns. - -[source,console] --------------------------------------------------- -GET /_cat/nodeattrs?v=true&h=name,pid,attr,value --------------------------------------------------- -// TEST[s/,value/,value&s=node,attr/] -// Sort the resulting attributes so we can assert on them more easily - -The API returns the following response: - -[source,txt] --------------------------------------------------- -name pid attr value -... -node-0 19566 testattr test -... --------------------------------------------------- -// TESTRESPONSE[s/19566/\\d*/] -// TESTRESPONSE[s/\.\.\.\n$/\n(.+ xpack\\.installed true\n)?\n/] -// TESTRESPONSE[s/\.\.\.\n/(.+ ml\\..+\n)*/ non_json] -// If xpack is not installed then neither ... with match anything -// If xpack is installed then the first ... contains ml attributes -// and the second contains xpack.installed=true \ No newline at end of file diff --git a/docs/reference/cat/nodes.asciidoc b/docs/reference/cat/nodes.asciidoc deleted file mode 100644 index a5a813e8d37d5..0000000000000 --- a/docs/reference/cat/nodes.asciidoc +++ /dev/null @@ -1,398 +0,0 @@ -[[cat-nodes]] -=== cat nodes API - -++++ -cat nodes -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. -They are _not_ intended for use by applications. -For application consumption, use the <>. -==== - -Returns information about a <>. - -[[cat-nodes-api-request]] -==== {api-request-title} - -`GET /_cat/nodes` - -[[cat-nodes-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cat-nodes-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bytes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -`full_id`:: -(Optional, Boolean) If `true`, return the full node ID. -If `false`, return the shortened node ID. -Defaults to `false`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] -+ --- -If you do not specify which columns to include, the API returns the default columns in the order listed below. -If you explicitly specify one or more columns, it only returns the specified columns. - -Valid columns are: - -`ip`, `i`:: -(Default) IP address, such as `127.0.1.1`. - -`heap.percent`, `hp`, `heapPercent`:: -(Default) Used percentage of total allocated Elasticsearch JVM heap, such as `7`. This reflects only the {es} process running within the operating system and is the most direct indicator of its JVM/heap/memory resource performance. - -`heap.max`, `hm`, `heapMax`:: -Total heap, such as `4gb`. - -`ram.percent`, `rp`, `ramPercent`:: -(Default) Used percentage of total operating system's memory, such as `47`. This reflects all processes running on operating system instead of only {es} and is not guaranteed to correlate to its performance. - -`file_desc.percent`, `fdp`, `fileDescriptorPercent`:: -Used file descriptors percentage, such as `1`. - -`node.role`, `r`, `role`, `nodeRole`:: -(Default) Roles of the node. -Returned values include -`c` (cold node), -`d` (data node), -`f` (frozen node), -`h` (hot node), -`i` (ingest node), -`l` (machine learning node), -`m` (master-eligible node), -`r` (remote cluster client node), -`s` (content node), -`t` ({transform} node), -`v` (voting-only node), -`w` (warm node), and -`-` (coordinating node only). -+ -For example, `dim` indicates a master-eligible data and ingest node. -See -<>. - -`master`, `m`:: -(Default) Indicates whether the node is the elected master node. -Returned values include `*` (elected master) and `-` (not elected master). - -`name`, `n`:: -(Default) Node name, such as `I8hydUG`. - -`id`, `nodeId`:: -ID of the node, such as `k0zy`. - -`pid`, `p`:: -Process ID, such as `13061`. - -`port`, `po`:: -Bound transport port, such as `9300`. - -`http_address`, `http`:: -Bound http address, such as `127.0.0.1:9200`. - -`version`, `v`:: -Elasticsearch version, such as {version}. - -`build`, `b`:: -Elasticsearch build hash, such as `5c03844`. - -`jdk`, `j`:: -Java version, such as `1.8.0`. - -`disk.total`, `dt`, `diskTotal`:: -Total disk space, such as `458.3gb`. - -`disk.used`, `du`, `diskUsed`:: -Used disk space, such as `259.8gb`. - -`disk.avail`, `d`, `disk`, `diskAvail`:: -Available disk space, such as `198.4gb`. - -`disk.used_percent`, `dup`, `diskUsedPercent`:: -Used disk space percentage, such as `47`. - -`heap.current`, `hc`, `heapCurrent`:: -Used heap, such as `311.2mb`. - -`ram.current`,`rc`, `ramCurrent`:: -Used total memory, such as `513.4mb`. - -`ram.max`, `rm`, `ramMax`:: -Total memory, such as `2.9gb`. - -`file_desc.current`, `fdc`, `fileDescriptorCurrent`:: -Used file descriptors, such as `123`. - -`file_desc.max`, `fdm`, `fileDescriptorMax`:: -Maximum number of file descriptors, such as `1024`. - -`cpu`:: -(Default) Recent system CPU usage as percent, such as `12`. - -`load_1m`, `l`:: -(Default) Most recent load average, such as `0.22`. - -`load_5m`, `l`:: -(Default) Load average for the last five minutes, such as `0.78`. - -`load_15m`, `l`:: -(Default) Load average for the last fifteen minutes, such as `1.24`. - -`uptime`, `u`:: -Node uptime, such as `17.3m`. - -`completion.size`, `cs`, `completionSize`:: -Size of completion, such as `0b`. - -`fielddata.memory_size`, `fm`, `fielddataMemory`:: -Used fielddata cache memory, such as `0b`. - -`fielddata.evictions`, `fe`, `fielddataEvictions`:: -Fielddata cache evictions, such as `0`. - -`query_cache.memory_size`, `qcm`, `queryCacheMemory`:: -Used query cache memory, such as `0b`. - -`query_cache.evictions`, `qce`, `queryCacheEvictions`:: -Query cache evictions, such as `0`. - -`query_cache.hit_count`, `qchc`, `queryCacheHitCount`:: -Query cache hit count, such as `0`. - -`query_cache.miss_count`, `qcmc`, `queryCacheMissCount`:: -Query cache miss count, such as `0`. - -`request_cache.memory_size`, `rcm`, `requestCacheMemory`:: -Used request cache memory, such as `0b`. - -`request_cache.evictions`, `rce`, `requestCacheEvictions`:: -Request cache evictions, such as `0`. - -`request_cache.hit_count`, `rchc`, `requestCacheHitCount`:: -Request cache hit count, such as `0`. - -`request_cache.miss_count`, `rcmc`, `requestCacheMissCount`:: -Request cache miss count, such as `0`. - -`flush.total`, `ft`, `flushTotal`:: -Number of flushes, such as `1`. - -`flush.total_time`, `ftt`, `flushTotalTime`:: -Time spent in flush, such as `1`. - -`get.current`, `gc`, `getCurrent`:: -Number of current get operations, such as `0`. - -`get.time`, `gti`, `getTime`:: -Time spent in get, such as `14ms`. - -`get.total`, `gto`, `getTotal`:: -Number of get operations, such as `2`. - -`get.exists_time`, `geti`, `getExistsTime`:: -Time spent in successful gets, such as `14ms`. - -`get.exists_total`, `geto`, `getExistsTotal`:: -Number of successful get operations, such as `2`. - -`get.missing_time`, `gmti`, `getMissingTime`:: -Time spent in failed gets, such as `0s`. - -`get.missing_total`, `gmto`, `getMissingTotal`:: -Number of failed get operations, such as `1`. - -`indexing.delete_current`, `idc`, `indexingDeleteCurrent`:: -Number of current deletion operations, such as `0`. - -`indexing.delete_time`, `idti`, `indexingDeleteTime`:: -Time spent in deletions, such as `2ms`. - -`indexing.delete_total`, `idto`, `indexingDeleteTotal`:: -Number of deletion operations, such as `2`. - -`indexing.index_current`, `iic`, `indexingIndexCurrent`:: -Number of current indexing operations, such as `0`. - -`indexing.index_time`, `iiti`, `indexingIndexTime`:: -Time spent in indexing, such as `134ms`. - -`indexing.index_total`, `iito`, `indexingIndexTotal`:: -Number of indexing operations, such as `1`. - -`indexing.index_failed`, `iif`, `indexingIndexFailed`:: -Number of failed indexing operations, such as `0`. - -`indexing.index_failed_due_to_version_conflict`, `iifvc`, `indexingIndexFailedDueToVersionConflict`:: -Number of failed indexing operations due to version conflict, such as `0`. - -`merges.current`, `mc`, `mergesCurrent`:: -Number of current merge operations, such as `0`. - -`merges.current_docs`, `mcd`, `mergesCurrentDocs`:: -Number of current merging documents, such as `0`. - -`merges.current_size`, `mcs`, `mergesCurrentSize`:: -Size of current merges, such as `0b`. - -`merges.total`, `mt`, `mergesTotal`:: -Number of completed merge operations, such as `0`. - -`merges.total_docs`, `mtd`, `mergesTotalDocs`:: -Number of merged documents, such as `0`. - -`merges.total_size`, `mts`, `mergesTotalSize`:: -Size of current merges, such as `0b`. - -`merges.total_time`, `mtt`, `mergesTotalTime`:: -Time spent merging documents, such as `0s`. - -`refresh.total`, `rto`, `refreshTotal`:: -Number of refreshes, such as `16`. - -`refresh.time`, `rti`, `refreshTime`:: -Time spent in refreshes, such as `91ms`. - -`script.compilations`, `scrcc`, `scriptCompilations`:: -Total script compilations, such as `17`. - -`script.cache_evictions`, `scrce`, `scriptCacheEvictions`:: -Total compiled scripts evicted from cache, such as `6`. - -`search.fetch_current`, `sfc`, `searchFetchCurrent`:: -Current fetch phase operations, such as `0`. - -`search.fetch_time`, `sfti`, `searchFetchTime`:: -Time spent in fetch phase, such as `37ms`. - -`search.fetch_total`, `sfto`, `searchFetchTotal`:: -Number of fetch operations, such as `7`. - -`search.open_contexts`, `so`, `searchOpenContexts`:: -Open search contexts, such as `0`. - -`search.query_current`, `sqc`, `searchQueryCurrent`:: -Current query phase operations, such as `0`. - -`search.query_time`, `sqti`, `searchQueryTime`:: -Time spent in query phase, such as `43ms`. - -`search.query_total`, `sqto`, `searchQueryTotal`:: -Number of query operations, such as `9`. - -`search.scroll_current`, `scc`, `searchScrollCurrent`:: -Open scroll contexts, such as `2`. - -`search.scroll_time`, `scti`, `searchScrollTime`:: -Time scroll contexts held open, such as `2m`. - -`search.scroll_total`, `scto`, `searchScrollTotal`:: -Completed scroll contexts, such as `1`. - -`segments.count`, `sc`, `segmentsCount`:: -Number of segments, such as `4`. - -`segments.memory`, `sm`, `segmentsMemory`:: -Memory used by segments, such as `1.4kb`. - -`segments.index_writer_memory`, `siwm`, `segmentsIndexWriterMemory`:: -Memory used by index writer, such as `18mb`. - -`segments.version_map_memory`, `svmm`, `segmentsVersionMapMemory`:: -Memory used by version map, such as `1.0kb`. - -`segments.fixed_bitset_memory`, `sfbm`, `fixedBitsetMemory`:: -Memory used by fixed bit sets for nested object field types and type filters for types referred in <> fields, such as `1.0kb`. - -`suggest.current`, `suc`, `suggestCurrent`:: -Number of current suggest operations, such as `0`. - -`suggest.time`, `suti`, `suggestTime`:: -Time spent in suggest, such as `0`. - -`suggest.total`, `suto`, `suggestTotal`:: -Number of suggest operations, such as `0`. - -`shard_stats.total_count`, `sstc`, `shards`, `shardStatsTotalCount`:: -Number of shards assigned. - -`mappings.total_count`, `mtc`, `mappingsTotalCount`:: -Number of mappings, including <> and <> fields. - -`mappings.total_estimated_overhead_in_bytes`, `mteo`, `mappingsTotalEstimatedOverheadInBytes`:: -Estimated heap overhead, in bytes, of mappings on this node, which allows for 1KiB of heap for every mapped field. --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=include-unloaded-segments] - -[[cat-nodes-api-example]] -==== {api-examples-title} - -[[cat-nodes-api-ex-default]] -===== Example with default columns - -[source,console] --------------------------------------------------- -GET /_cat/nodes?v=true --------------------------------------------------- - -The API returns the following response: - -[source,txt] --------------------------------------------------- -ip heap.percent ram.percent cpu load_1m load_5m load_15m node.role master name -127.0.0.1 65 99 42 3.07 dim * mJw06l1 --------------------------------------------------- -// TESTRESPONSE[s/3.07/(\\d+\\.\\d+( \\d+\\.\\d+ (\\d+\\.\\d+)?)?)?/] -// TESTRESPONSE[s/65 99 42/\\d+ \\d+ \\d+/] -// TESTRESPONSE[s/dim/.+/ s/[*]/[*]/ s/mJw06l1/.+/ non_json] - -The `ip`, `heap.percent`, `ram.percent`, `cpu`, and `load_*` columns provide the IP addresses and performance information of each node. - -The `node.role`, `master`, and `name` columns provide information useful for monitoring an entire cluster, particularly large ones. - -[[cat-nodes-api-ex-headings]] -===== Example with explicit columns - -The following API request returns the `id`, `ip`, `port`, `v` (version), and `m` -(master) columns. - -[source,console] --------------------------------------------------- -GET /_cat/nodes?v=true&h=id,ip,port,v,m --------------------------------------------------- - -The API returns the following response: - -["source","txt",subs="attributes,callouts"] --------------------------------------------------- -id ip port v m -veJR 127.0.0.1 59938 {version} * --------------------------------------------------- -// TESTRESPONSE[s/veJR/.+/ s/59938/\\d+/ s/[*]/[*]/ non_json] diff --git a/docs/reference/cat/pending_tasks.asciidoc b/docs/reference/cat/pending_tasks.asciidoc deleted file mode 100644 index 081a74da07552..0000000000000 --- a/docs/reference/cat/pending_tasks.asciidoc +++ /dev/null @@ -1,76 +0,0 @@ -[[cat-pending-tasks]] -=== cat pending tasks API -++++ -cat pending tasks -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns <> that have not yet been executed. - -[[cat-pending-tasks-api-request]] -==== {api-request-title} - -`GET /_cat/pending_tasks` - -[[cat-pending-tasks-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cat-pending-tasks-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=local] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-pending-tasks-api-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET /_cat/pending_tasks?v=true --------------------------------------------------- - -The API returns the following response: - -[source,txt] --------------------------------------------------- -insertOrder timeInQueue priority source - 1685 855ms HIGH update-mapping [foo][t] - 1686 843ms HIGH update-mapping [foo][t] - 1693 753ms HIGH refresh-mapping [foo][[t]] - 1688 816ms HIGH update-mapping [foo][t] - 1689 802ms HIGH update-mapping [foo][t] - 1690 787ms HIGH update-mapping [foo][t] - 1691 773ms HIGH update-mapping [foo][t] --------------------------------------------------- -// TESTRESPONSE[s/(\n.+)+/(\\n.+)*/ non_json] -// We can't assert anything about the tasks in progress here because we don't -// know what might be in progress.... diff --git a/docs/reference/cat/plugins.asciidoc b/docs/reference/cat/plugins.asciidoc deleted file mode 100644 index c4d830ee52a76..0000000000000 --- a/docs/reference/cat/plugins.asciidoc +++ /dev/null @@ -1,80 +0,0 @@ -[[cat-plugins]] -=== cat plugins API -++++ -cat plugins -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns a list of <> running on each node of a cluster. - - -[[cat-plugins-api-request]] -==== {api-request-title} - -`GET /_cat/plugins` - -[[cat-plugins-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cat-plugins-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=local] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-plugins-api-example]] -==== {api-examples-title} - -[source,console] ------------------------------------------------------------------------------- -GET /_cat/plugins?v=true&s=component&h=name,component,version,description ------------------------------------------------------------------------------- - -The API returns the following response: - -["source","txt",subs="attributes,callouts"] ------------------------------------------------------------------------------- -name component version description -U7321H6 analysis-icu {version_qualified} The ICU Analysis plugin integrates the Lucene ICU module into Elasticsearch, adding ICU-related analysis components. -U7321H6 analysis-kuromoji {version_qualified} The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. -U7321H6 analysis-nori {version_qualified} The Korean (nori) Analysis plugin integrates Lucene nori analysis module into elasticsearch. -U7321H6 analysis-phonetic {version_qualified} The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. -U7321H6 analysis-smartcn {version_qualified} Smart Chinese Analysis plugin integrates Lucene Smart Chinese analysis module into elasticsearch. -U7321H6 analysis-stempel {version_qualified} The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch. -U7321H6 analysis-ukrainian {version_qualified} The Ukrainian Analysis plugin integrates the Lucene UkrainianMorfologikAnalyzer into elasticsearch. -U7321H6 discovery-azure-classic {version_qualified} The Azure Classic Discovery plugin allows to use Azure Classic API for the unicast discovery mechanism -U7321H6 discovery-ec2 {version_qualified} The EC2 discovery plugin allows to use AWS API for the unicast discovery mechanism. -U7321H6 discovery-gce {version_qualified} The Google Compute Engine (GCE) Discovery plugin allows to use GCE API for the unicast discovery mechanism. -U7321H6 mapper-annotated-text {version_qualified} The Mapper Annotated_text plugin adds support for text fields with markup used to inject annotation tokens into the index. -U7321H6 mapper-murmur3 {version_qualified} The Mapper Murmur3 plugin allows to compute hashes of a field's values at index-time and to store them in the index. -U7321H6 mapper-size {version_qualified} The Mapper Size plugin allows document to record their uncompressed size at index time. -U7321H6 store-smb {version_qualified} The Store SMB plugin adds support for SMB stores. ------------------------------------------------------------------------------- -// TESTRESPONSE[s/([.()])/\\$1/ s/U7321H6/.+/ non_json] diff --git a/docs/reference/cat/recovery.asciidoc b/docs/reference/cat/recovery.asciidoc deleted file mode 100644 index 9df46f6fe93f6..0000000000000 --- a/docs/reference/cat/recovery.asciidoc +++ /dev/null @@ -1,160 +0,0 @@ -[[cat-recovery]] -=== cat recovery API -++++ -cat recovery -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns information about ongoing and completed <>. - -For <>, the API returns information about the stream's backing -indices. - -[[cat-recovery-api-request]] -==== {api-request-title} - -`GET /_cat/recovery/` - -`GET /_cat/recovery` - -[[cat-recovery-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. You must -also have the `monitor` or `manage` <> -for any data stream, index, or alias you retrieve. - -[[cat-recovery-api-desc]] -==== {api-description-title} - -The cat recovery API returns information about shard recoveries, both -ongoing and completed. It is a more compact view of the JSON -<> API. - -include::{es-ref-dir}/modules/shard-recovery-desc.asciidoc[] - - -[[cat-recovery-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Comma-separated list of data streams, indices, and aliases -used to limit the request. Supports wildcards (`*`). To target all data streams -and indices, omit this parameter or use `*` or `_all`. - -[[cat-recovery-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=active-only] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bytes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=detailed] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=index-query-parm] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-recovery-api-example]] -==== {api-examples-title} - -[[cat-recovery-api-ex-dead]] -===== Example with no ongoing recoveries - -[source,console] ----------------------------------------------------------------------------- -GET _cat/recovery?v=true ----------------------------------------------------------------------------- -// TEST[setup:my_index] - -The API returns the following response: - -[source,txt] ---------------------------------------------------------------------------- -index shard time type stage source_host source_node target_host target_node repository snapshot files files_recovered files_percent files_total bytes bytes_recovered bytes_percent bytes_total translog_ops translog_ops_recovered translog_ops_percent -my-index-000001 0 13ms store done n/a n/a 127.0.0.1 node-0 n/a n/a 0 0 100% 13 0b 0b 100% 9928b 0 0 100.0% ---------------------------------------------------------------------------- -// TESTRESPONSE[s/store/empty_store/] -// TESTRESPONSE[s/100%/0.0%/] -// TESTRESPONSE[s/9928b/0b/] -// TESTRESPONSE[s/13ms/[0-9.]+m?s/] -// TESTRESPONSE[s/13/\\d+/ non_json] - -In this example response, the source and target nodes are the same because the -recovery type is `store`, meaning they were read from local storage on node -start. - -[[cat-recovery-api-ex-live]] -===== Example with a live shard recovery - -By increasing the replica count of an index and bringing another node online to -host the replicas, you can retrieve information about an ongoing recovery. - -[source,console] ----------------------------------------------------------------------------- -GET _cat/recovery?v=true&h=i,s,t,ty,st,shost,thost,f,fp,b,bp ----------------------------------------------------------------------------- -// TEST[setup:my_index] - -The API returns the following response: - -[source,txt] ----------------------------------------------------------------------------- -i s t ty st shost thost f fp b bp -my-index-000001 0 1252ms peer done 192.168.1.1 192.168.1.2 0 100.0% 0b 100.0% ----------------------------------------------------------------------------- -// TESTRESPONSE[s/peer/empty_store/] -// TESTRESPONSE[s/192.168.1.2/127.0.0.1/] -// TESTRESPONSE[s/192.168.1.1/n\/a/] -// TESTRESPONSE[s/100.0%/0.0%/] -// TESTRESPONSE[s/1252ms/[0-9.]+m?s/ non_json] - -In this example response, the recovery type is `peer`, meaning the shard -recovered from another node. The returned files and bytes are real-time -measurements. - -[[cat-recovery-api-ex-snapshot]] -===== Example with a snapshot recovery - -You can restore backups of an index using the <> API. You can use the cat recovery API retrieve information about a -snapshot recovery. - -[source,console] --------------------------------------------------------------------------------- -GET _cat/recovery?v=true&h=i,s,t,ty,st,rep,snap,f,fp,b,bp --------------------------------------------------------------------------------- -// TEST[skip:no need to execute snapshot/restore here] - -The API returns the following response with a recovery type of `snapshot`: - -[source,txt] --------------------------------------------------------------------------------- -i s t ty st rep snap f fp b bp -my-index-000001 0 1978ms snapshot done my-repo snap-1 79 8.0% 12086 9.0% --------------------------------------------------------------------------------- -// TESTRESPONSE[non_json] \ No newline at end of file diff --git a/docs/reference/cat/repositories.asciidoc b/docs/reference/cat/repositories.asciidoc deleted file mode 100644 index 7e6283336e17a..0000000000000 --- a/docs/reference/cat/repositories.asciidoc +++ /dev/null @@ -1,70 +0,0 @@ -[[cat-repositories]] -=== cat repositories API -++++ -cat repositories -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns the <> for a cluster. - - -[[cat-repositories-api-request]] -==== {api-request-title} - -`GET /_cat/repositories` - -[[cat-repositories-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the -`monitor_snapshot`, `create_snapshot`, or `manage` -<> to use this API. - -[[cat-repositories-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=local] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-repositories-api-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET /_cat/repositories?v=true --------------------------------------------------- -// TEST[s/^/PUT \/_snapshot\/repo1\n{"type": "fs", "settings": {"location": "repo\/1"}}\n/] - -The API returns the following response: - -[source,txt] --------------------------------------------------- -id type -repo1 fs -repo2 s3 --------------------------------------------------- -// TESTRESPONSE[s/\nrepo2 s3// non_json] diff --git a/docs/reference/cat/segments.asciidoc b/docs/reference/cat/segments.asciidoc deleted file mode 100644 index 70b5e597eb95f..0000000000000 --- a/docs/reference/cat/segments.asciidoc +++ /dev/null @@ -1,145 +0,0 @@ -[[cat-segments]] -=== cat segments API -++++ -cat segments -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns low-level information about the https://lucene.apache.org/core/[Lucene] -segments in index shards. - -For <>, the API returns information about the stream's backing -indices. - -[[cat-segments-api-request]] -==== {api-request-title} - -`GET /_cat/segments/` - -`GET /_cat/segments` - -[[cat-segments-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. You must -also have the `monitor` or `manage` <> -for any data stream, index, or alias you retrieve. - -[[cat-segments-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Comma-separated list of data streams, indices, and aliases -used to limit the request. Supports wildcards (`*`). To target all data streams -and indices, omit this parameter or use `*` or `_all`. - -[[cat-segments-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bytes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] -+ --- -If you do not specify which columns to include, the API returns the default -columns in the order listed below. If you explicitly specify one or more -columns, it only returns the specified columns. - -Valid columns are: - -`index`, `i`, `idx`:: -(Default) Name of the index. - -`shard`, `s`, `sh`:: -(Default) Name of the shard. - -`prirep`, `p`, `pr`, `primaryOrReplica`:: -(Default) Shard type. Returned values are `primary` or `replica`. - -`ip`:: -(Default) IP address of the segment's shard, such as `127.0.1.1`. - -`segment`:: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=segment] - -`generation`:: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=generation] - -`docs.count`:: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=docs-count] - -`docs.deleted`:: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=docs-deleted] - -`size`:: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=segment-size] - -`size.memory`:: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=memory] - -`committed`:: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=committed] - -`searchable`:: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=segment-search] - -`version`:: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=segment-version] - -`compound`:: -(Default) If `true`, the segment is stored in a compound file. This means Lucene -merged all files from the segment in a single file to save file descriptors. - -`id`:: -ID of the node, such as `k0zy`. --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-segments-api-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET /_cat/segments?v=true --------------------------------------------------- -// TEST[s/^/PUT \/test\/_doc\/1?refresh\n{"test":"test"}\nPUT \/test1\/_doc\/1?refresh\n{"test":"test"}\n/] - -The API returns the following response: - -["source","txt",subs="attributes,callouts"] --------------------------------------------------- -index shard prirep ip segment generation docs.count docs.deleted size size.memory committed searchable version compound -test 0 p 127.0.0.1 _0 0 1 0 3kb 0 false true {lucene_version} true -test1 0 p 127.0.0.1 _0 0 1 0 3kb 0 false true {lucene_version} true --------------------------------------------------- -// TESTRESPONSE[s/3kb/\\d+(\\.\\d+)?[mk]?b/ s/2042/\\d+/ non_json] diff --git a/docs/reference/cat/shards.asciidoc b/docs/reference/cat/shards.asciidoc deleted file mode 100644 index 2d3859e74c87e..0000000000000 --- a/docs/reference/cat/shards.asciidoc +++ /dev/null @@ -1,428 +0,0 @@ -[[cat-shards]] -=== cat shards API - -++++ -cat shards -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. -They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -The `shards` command is the detailed view of all nodes' shard <>. -It will tell you if the shard is a primary or replica, the number of docs, the -bytes it takes on disk, the node where it's located, and if the shard is -currently <>. - -For <>, the API returns information about the stream's backing indices. - -[[cat-shards-api-request]] -==== {api-request-title} - -`GET /_cat/shards/` - -`GET /_cat/shards` - -[[cat-shards-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. -You must also have the `monitor` or `manage` <> -to view the full information for any data stream, index, or alias you retrieve. - -[[cat-shards-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Comma-separated list of data streams, indices, and aliases used to limit the request. -Supports wildcards (`*`). -To target all data streams and indices, omit this parameter or use `*` or `_all`. - -[[cat-shards-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bytes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] -+ --- -If you do not specify which columns to include, the API returns the default columns in the order listed below. -If you explicitly specify one or more columns, it only returns the specified columns. - -Valid columns are: - -`index`, `i`, `idx`:: -(Default) Name of the index. - -`shard`, `s`, `sh`:: -(Default) Name of the shard. - -`prirep`, `p`, `pr`, `primaryOrReplica`:: -(Default) Shard type. -Returned values are `primary` or `replica`. - -`state`, `st`:: -(Default) State of the shard. -Returned values are: -+ -* `INITIALIZING`: The shard is recovering from a peer shard or gateway. -* `RELOCATING`: The shard is relocating. -* `STARTED`: The shard has started. -* `UNASSIGNED`: The shard is not assigned to any node. - -`docs`, `d`, `dc`:: -(Default) Number of documents in shard, such as `25`. - -`store`, `sto`:: -(Default) Disk space used by the shard, such as `5kb`. - -`dataset.size`:: -(Default) Disk space used by the shard's dataset, which may or may not be the size on disk, but includes space used by the shard on object storage. -Reported as a size value such as `5kb`. - -`ip`:: -(Default) IP address of the node, such as `127.0.1.1`. - -`id`:: -(Default) ID of the node, such as `k0zy`. - -`node`, `n`:: -(Default) Node name, such as `I8hydUG`. - -`completion.size`, `cs`, `completionSize`:: -Size of completion, such as `0b`. - -`dense_vector.value_count`, `dvc`, `denseVectorCount`:: -Number of indexed dense vectors. - -`fielddata.memory_size`, `fm`, `fielddataMemory`:: -Used fielddata cache memory, such as `0b`. - -`fielddata.evictions`, `fe`, `fielddataEvictions`:: -Fielddata cache evictions, such as `0`. - -`flush.total`, `ft`, `flushTotal`:: -Number of flushes, such as `1`. - -`flush.total_time`, `ftt`, `flushTotalTime`:: -Time spent in flush, such as `1`. - -`get.current`, `gc`, `getCurrent`:: -Number of current get operations, such as `0`. - -`get.time`, `gti`, `getTime`:: -Time spent in get, such as `14ms`. - -`get.total`, `gto`, `getTotal`:: -Number of get operations, such as `2`. - -`get.exists_time`, `geti`, `getExistsTime`:: -Time spent in successful gets, such as `14ms`. - -`get.exists_total`, `geto`, `getExistsTotal`:: -Number of successful get operations, such as `2`. - -`get.missing_time`, `gmti`, `getMissingTime`:: -Time spent in failed gets, such as `0s`. - -`get.missing_total`, `gmto`, `getMissingTotal`:: -Number of failed get operations, such as `1`. - -`indexing.delete_current`, `idc`, `indexingDeleteCurrent`:: -Number of current deletion operations, such as `0`. - -`indexing.delete_time`, `idti`, `indexingDeleteTime`:: -Time spent in deletions, such as `2ms`. - -`indexing.delete_total`, `idto`, `indexingDeleteTotal`:: -Number of deletion operations, such as `2`. - -`indexing.index_current`, `iic`, `indexingIndexCurrent`:: -Number of current indexing operations, such as `0`. - -`indexing.index_time`, `iiti`, `indexingIndexTime`:: -Time spent in indexing, such as `134ms`. - -`indexing.index_total`, `iito`, `indexingIndexTotal`:: -Number of indexing operations, such as `1`. - -`indexing.index_failed`, `iif`, `indexingIndexFailed`:: -Number of failed indexing operations, such as `0`. - -`indexing.index_failed_due_to_version_conflict`, `iifvc`, `indexingIndexFailedDueToVersionConflict`:: -Number of failed indexing operations due to version conflict, such as `0`. - -`merges.current`, `mc`, `mergesCurrent`:: -Number of current merge operations, such as `0`. - -`merges.current_docs`, `mcd`, `mergesCurrentDocs`:: -Number of current merging documents, such as `0`. - -`merges.current_size`, `mcs`, `mergesCurrentSize`:: -Size of current merges, such as `0b`. - -`merges.total`, `mt`, `mergesTotal`:: -Number of completed merge operations, such as `0`. - -`merges.total_docs`, `mtd`, `mergesTotalDocs`:: -Number of merged documents, such as `0`. - -`merges.total_size`, `mts`, `mergesTotalSize`:: -Size of current merges, such as `0b`. - -`merges.total_time`, `mtt`, `mergesTotalTime`:: -Time spent merging documents, such as `0s`. - -`query_cache.memory_size`, `qcm`, `queryCacheMemory`:: -Used query cache memory, such as `0b`. - -`query_cache.evictions`, `qce`, `queryCacheEvictions`:: -Query cache evictions, such as `0`. - -`recoverysource.type`, `rs`:: -Type of recovery source. - -`refresh.total`, `rto`, `refreshTotal`:: -Number of refreshes, such as `16`. - -`refresh.time`, `rti`, `refreshTime`:: -Time spent in refreshes, such as `91ms`. - -`search.fetch_current`, `sfc`, `searchFetchCurrent`:: -Current fetch phase operations, such as `0`. - -`search.fetch_time`, `sfti`, `searchFetchTime`:: -Time spent in fetch phase, such as `37ms`. - -`search.fetch_total`, `sfto`, `searchFetchTotal`:: -Number of fetch operations, such as `7`. - -`search.open_contexts`, `so`, `searchOpenContexts`:: -Open search contexts, such as `0`. - -`search.query_current`, `sqc`, `searchQueryCurrent`:: -Current query phase operations, such as `0`. - -`search.query_time`, `sqti`, `searchQueryTime`:: -Time spent in query phase, such as `43ms`. - -`search.query_total`, `sqto`, `searchQueryTotal`:: -Number of query operations, such as `9`. - -`search.scroll_current`, `scc`, `searchScrollCurrent`:: -Open scroll contexts, such as `2`. - -`search.scroll_time`, `scti`, `searchScrollTime`:: -Time scroll contexts held open, such as `2m`. - -`search.scroll_total`, `scto`, `searchScrollTotal`:: -Completed scroll contexts, such as `1`. - -`segments.count`, `sc`, `segmentsCount`:: -Number of segments, such as `4`. - -`segments.memory`, `sm`, `segmentsMemory`:: -Memory used by segments, such as `1.4kb`. - -`segments.index_writer_memory`, `siwm`, `segmentsIndexWriterMemory`:: -Memory used by index writer, such as `18mb`. - -`segments.version_map_memory`, `svmm`, `segmentsVersionMapMemory`:: -Memory used by version map, such as `1.0kb`. - -`segments.fixed_bitset_memory`, `sfbm`, `fixedBitsetMemory`:: -Memory used by fixed bit sets for nested object field types and type filters for types referred in <> fields, such as `1.0kb`. - -`seq_no.global_checkpoint`, `sqg`, `globalCheckpoint`:: -Global checkpoint. - -`seq_no.local_checkpoint`, `sql`, `localCheckpoint`:: -Local checkpoint. - -`seq_no.max`, `sqm`, `maxSeqNo`:: -Maximum sequence number. - -`sparse_vector.value_count`, `svc`, `sparseVectorCount`:: -Number of indexed <>. - -`suggest.current`, `suc`, `suggestCurrent`:: -Number of current suggest operations, such as `0`. - -`suggest.time`, `suti`, `suggestTime`:: -Time spent in suggest, such as `0`. - -`suggest.total`, `suto`, `suggestTotal`:: -Number of suggest operations, such as `0`. - -`unassigned.at`, `ua`:: -Time at which the shard became unassigned in -{wikipedia}/List_of_UTC_time_offsets[Coordinated Universal Time (UTC)]. - -`unassigned.details`, `ud`:: -Details about why the shard became unassigned. -This does not explain why the shard is currently unassigned. -To understand why a shard is not assigned, use the <> API. - -`unassigned.for`, `uf`:: -Time at which the shard was requested to be unassigned in -{wikipedia}/List_of_UTC_time_offsets[Coordinated Universal Time (UTC)]. - -[[reason-unassigned]] -`unassigned.reason`, `ur`:: -Indicates the reason for the last change to the state of this unassigned shard. -This does not explain why the shard is currently unassigned. -To understand why a shard is not assigned, use the <> API. -Returned values include: -+ -* `ALLOCATION_FAILED`: Unassigned as a result of a failed allocation of the shard. -* `CLUSTER_RECOVERED`: Unassigned as a result of a full cluster recovery. -* `DANGLING_INDEX_IMPORTED`: Unassigned as a result of importing a dangling index. -* `EXISTING_INDEX_RESTORED`: Unassigned as a result of restoring into a closed index. -* `FORCED_EMPTY_PRIMARY`: The shard's allocation was last modified by forcing an empty primary using the <> API. -* `INDEX_CLOSED`: Unassigned because the index was closed. -* `INDEX_CREATED`: Unassigned as a result of an API creation of an index. -* `INDEX_REOPENED`: Unassigned as a result of opening a closed index. -* `MANUAL_ALLOCATION`: The shard's allocation was last modified by the <> API. -* `NEW_INDEX_RESTORED`: Unassigned as a result of restoring into a new index. -* `NODE_LEFT`: Unassigned as a result of the node hosting it leaving the cluster. -* `NODE_RESTARTING`: Similar to `NODE_LEFT`, except that the node was registered as restarting using the <>. -* `PRIMARY_FAILED`: The shard was initializing as a replica, but the primary shard failed before the initialization completed. -* `REALLOCATED_REPLICA`: A better replica location is identified and causes the existing replica allocation to be cancelled. -* `REINITIALIZED`: When a shard moves from started back to initializing. -* `REPLICA_ADDED`: Unassigned as a result of explicit addition of a replica. -* `REROUTE_CANCELLED`: Unassigned as a result of explicit cancel reroute command. - --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - -[[cat-shards-api-example]] -==== {api-examples-title} - -[[cat-shards-api-example-single]] -===== Example with a single data stream or index - -[source,console] ---------------------------------------------------------------------------- -GET _cat/shards ---------------------------------------------------------------------------- -// TEST[setup:my_index] - -The API returns the following response: - -[source,txt] ---------------------------------------------------------------------------- -my-index-000001 0 p STARTED 3014 31.1mb 192.168.56.10 H5dfFeA ---------------------------------------------------------------------------- -// TESTRESPONSE[s/3014/\\d+/] -// TESTRESPONSE[s/31.1mb/\\d+(\.\\d+)?[kmg]?b/] -// TESTRESPONSE[s/192.168.56.10/.*/] -// TESTRESPONSE[s/H5dfFeA/node-0/ non_json] - -[[cat-shards-api-example-wildcard]] -===== Example with a wildcard pattern - -If your cluster has many shards, you can use a wildcard pattern in the -`` path parameter to limit the API request. - -The following request returns information for any data streams or indices beginning with `my-index-`. - -[source,console] ---------------------------------------------------------------------------- -GET _cat/shards/my-index-* ---------------------------------------------------------------------------- -// TEST[setup:my_index] - -The API returns the following response: - -[source,txt] ---------------------------------------------------------------------------- -my-index-000001 0 p STARTED 3014 31.1mb 192.168.56.10 H5dfFeA ---------------------------------------------------------------------------- -// TESTRESPONSE[s/3014/\\d+/] -// TESTRESPONSE[s/31.1mb/\\d+(\.\\d+)?[kmg]?b/] -// TESTRESPONSE[s/192.168.56.10/.*/] -// TESTRESPONSE[s/H5dfFeA/node-0/ non_json] - - -[[relocation]] -===== Example with a relocating shard - -[source,console] ---------------------------------------------------------------------------- -GET _cat/shards ---------------------------------------------------------------------------- -// TEST[skip:for now, relocation cannot be recreated] - -The API returns the following response: - -[source,txt] ---------------------------------------------------------------------------- -my-index-000001 0 p RELOCATING 3014 31.1mb 192.168.56.10 H5dfFeA -> -> 192.168.56.30 bGG90GE ---------------------------------------------------------------------------- -// TESTRESPONSE[non_json] - -The `RELOCATING` value in `state` column indicates the index shard is relocating. - -[[states]] -===== Example with a shard states - -Before a shard is available for use, it goes through an `INITIALIZING` state. -You can use the cat shards API to see which shards are initializing. - -[source,console] ---------------------------------------------------------------------------- -GET _cat/shards ---------------------------------------------------------------------------- -// TEST[skip:there is no guarantee to test for shards in initializing state] - -The API returns the following response: - -[source,txt] ---------------------------------------------------------------------------- -my-index-000001 0 p STARTED 3014 31.1mb 192.168.56.10 H5dfFeA -my-index-000001 0 r INITIALIZING 0 14.3mb 192.168.56.30 bGG90GE ---------------------------------------------------------------------------- -// TESTRESPONSE[non_json] - -===== Example with reasons for unassigned shards - -The following request returns the `unassigned.reason` column, which indicates why a shard is unassigned. - -[source,console] ---------------------------------------------------------------------------- -GET _cat/shards?h=index,shard,prirep,state,unassigned.reason ---------------------------------------------------------------------------- -// TEST[skip:for now] - -The API returns the following response: - -[source,txt] ---------------------------------------------------------------------------- -my-index-000001 0 p STARTED 3014 31.1mb 192.168.56.10 H5dfFeA -my-index-000001 0 r STARTED 3014 31.1mb 192.168.56.30 bGG90GE -my-index-000001 0 r STARTED 3014 31.1mb 192.168.56.20 I8hydUG -my-index-000001 0 r UNASSIGNED ALLOCATION_FAILED ---------------------------------------------------------------------------- -// TESTRESPONSE[non_json] diff --git a/docs/reference/cat/snapshots.asciidoc b/docs/reference/cat/snapshots.asciidoc deleted file mode 100644 index 1da739b20272f..0000000000000 --- a/docs/reference/cat/snapshots.asciidoc +++ /dev/null @@ -1,155 +0,0 @@ -[[cat-snapshots]] -=== cat snapshots API -++++ -cat snapshots -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns information about the <> stored in one or -more repositories. A snapshot is a backup of an index or running {es} cluster. - - -[[cat-snapshots-api-request]] -==== {api-request-title} - -`GET /_cat/snapshots/` - -`GET /_cat/snapshots` - -[[cat-snapshots-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the -`monitor_snapshot`, `create_snapshot`, or `manage` -<> to use this API. - - -[[cat-snapshots-path-params]] -==== {api-path-parms-title} - -``:: -+ --- -(Optional, string) Comma-separated list of snapshot repositories used to limit -the request. Accepts wildcard expressions. `_all` returns all repositories. - -If any repository fails during the request, {es} returns an error. --- - - -[[cat-snapshots-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] -+ --- -If you do not specify which columns to include, the API returns the default -columns in the order listed below. If you explicitly specify one or more -columns, it only returns the specified columns. - -Valid columns are: - -`id`, `snapshot`:: -(Default) ID of the snapshot, such as `snap1`. - -`repository`, `re`, `repo`:: -(Default) Name of the repository, such as `repo1`. - -`status`, `s`:: -(Default) State of the snapshot process. Returned values are: -+ -* `FAILED`: The snapshot process failed. -* `INCOMPATIBLE`: The snapshot process is incompatible with the current cluster -version. -* `IN_PROGRESS`: The snapshot process started but has not completed. -* `PARTIAL`: The snapshot process completed with a partial success. -* `SUCCESS`: The snapshot process completed with a full success. - -`start_epoch`, `ste`, `startEpoch`:: -(Default) {wikipedia}/Unix_time[Unix `epoch` time] at which -the snapshot process started. - -`start_time`, `sti`, `startTime`:: -(Default) `HH:MM:SS` time at which the snapshot process started. - -`end_epoch`, `ete`, `endEpoch`:: -(Default) {wikipedia}/Unix_time[Unix `epoch` time] at which -the snapshot process ended. - -`end_time`, `eti`, `endTime`:: -(Default) `HH:MM:SS` time at which the snapshot process ended. - -`duration`, `dur`:: -(Default) Time it took the snapshot process to complete in <>. - -`indices`, `i`:: -(Default) Number of indices in the snapshot. - -`successful_shards`, `ss`:: -(Default) Number of successful shards in the snapshot. - -`failed_shards`, `fs`:: -(Default) Number of failed shards in the snapshot. - -`total_shards`, `ts`:: -(Default) Total number of shards in the snapshot. - -`reason`, `r`:: -Reason for any snapshot failures. --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -`ignore_unavailable`:: -(Optional, Boolean) If `true`, the response does not include information from -unavailable snapshots. Defaults to `false`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-snapshots-api-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET /_cat/snapshots/repo1?v=true&s=id --------------------------------------------------- -// TEST[s/^/PUT \/_snapshot\/repo1\/snap1?wait_for_completion=true\n/] -// TEST[s/^/PUT \/_snapshot\/repo1\/snap2?wait_for_completion=true\n/] -// TEST[s/^/PUT \/_snapshot\/repo1\n{"type": "fs", "settings": {"location": "repo\/1"}}\n/] - -The API returns the following response: - -[source,txt] --------------------------------------------------- -id repository status start_epoch start_time end_epoch end_time duration indices successful_shards failed_shards total_shards -snap1 repo1 FAILED 1445616705 18:11:45 1445616978 18:16:18 4.6m 1 4 1 5 -snap2 repo1 SUCCESS 1445634298 23:04:58 1445634672 23:11:12 6.2m 2 10 0 10 --------------------------------------------------- -// TESTRESPONSE[s/FAILED/SUCCESS/ s/14456\d+/\\d+/ s/\d+(\.\d+)?(m|s|ms)/\\d+(\\.\\d+)?(m|s|ms)/] -// TESTRESPONSE[s/\d+:\d+:\d+/\\d+:\\d+:\\d+/] -// TESTRESPONSE[s/1 4 1 5/\\d+ \\d+ \\d+ \\d+/] -// TESTRESPONSE[s/2 10 0 10/\\d+ \\d+ \\d+ \\d+/] -// TESTRESPONSE[non_json] - diff --git a/docs/reference/cat/tasks.asciidoc b/docs/reference/cat/tasks.asciidoc deleted file mode 100644 index ff654b30de992..0000000000000 --- a/docs/reference/cat/tasks.asciidoc +++ /dev/null @@ -1,98 +0,0 @@ -[[cat-tasks]] -=== cat task management API -++++ -cat task management -++++ - -beta::["The cat task management API is new and should still be considered a beta feature. The API may change in ways that are not backwards compatible.",{es-issue}51628] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns information about tasks currently executing in the cluster, -similar to the <> API. - - -[[cat-tasks-api-request]] -==== {api-request-title} - -`GET /_cat/tasks` - -[[cat-tasks-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cat-tasks-api-desc]] -==== {api-description-title} - -The cat task management API returns information -about tasks currently executing -on one or more nodes in the cluster. -It is a more compact view -of the JSON <> API. - - -[[cat-tasks-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=detailed] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -`nodes`:: -(Optional, string) -Comma-separated list of node IDs or names used to limit the response. Supports -wildcard (`*`) expressions. - -`parent_task_id`:: -(Optional, string) -Parent task ID used to limit the response. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-tasks-api-response-codes]] -==== {api-response-codes-title} - -include::{es-ref-dir}/cluster/tasks.asciidoc[tag=tasks-api-404] - - -[[cat-tasks-api-examples]] -==== {api-examples-title} - -[source,console] ----- -GET _cat/tasks?v=true ----- -// TEST[skip:No tasks to retrieve] - -The API returns the following response: - -[source,console-result] ----- -action task_id parent_task_id type start_time timestamp running_time ip node -cluster:monitor/tasks/lists[n] oTUltX4IQMOUUVeiohTt8A:124 oTUltX4IQMOUUVeiohTt8A:123 direct 1458585884904 01:48:24 44.1micros 127.0.0.1:9300 oTUltX4IQMOUUVeiohTt8A -cluster:monitor/tasks/lists oTUltX4IQMOUUVeiohTt8A:123 - transport 1458585884904 01:48:24 186.2micros 127.0.0.1:9300 oTUltX4IQMOUUVeiohTt8A ----- -// TESTRESPONSE[skip:No tasks to retrieve] -// TESTRESPONSE[non_json] diff --git a/docs/reference/cat/templates.asciidoc b/docs/reference/cat/templates.asciidoc deleted file mode 100644 index 78ff60c663d2f..0000000000000 --- a/docs/reference/cat/templates.asciidoc +++ /dev/null @@ -1,93 +0,0 @@ -[[cat-templates]] -=== cat templates API -++++ -cat templates -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns information about <> in a cluster. -You can use index templates to apply <> -and <> to new indices at creation. - - -[[cat-templates-api-request]] -==== {api-request-title} - -`GET /_cat/templates/` - -`GET /_cat/templates` - -[[cat-templates-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cat-templates-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) The name of the template to return. Accepts wildcard -expressions. If omitted, all templates are returned. - -[[cat-templates-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=local] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-templates-api-example]] -==== {api-examples-title} - -[source,console] ----- -GET _cat/templates/my-template-*?v=true&s=name ----- -// TEST[s/^/PUT _index_template\/my-template-0\n{"index_patterns": "te*", "priority": 500}\n/] -// TEST[s/^/PUT _index_template\/my-template-1\n{"index_patterns": "tea*", "priority": 501}\n/] -// TEST[s/^/PUT _index_template\/my-template-2\n{"index_patterns": "teak*", "priority": 502, "version": 7}\n/] - -The API returns the following response: - -[source,txt] ----- -name index_patterns order version composed_of -my-template-0 [te*] 500 [] -my-template-1 [tea*] 501 [] -my-template-2 [teak*] 502 7 [] ----- -// TESTRESPONSE[s/\*/\\*/ s/\[/\\[/ s/\]/\\]/ non_json] - -//// -[source,console] ----- -DELETE _index_template/my-template-0 -DELETE _index_template/my-template-1 -DELETE _index_template/my-template-2 ----- -// TEST[continued] -//// diff --git a/docs/reference/cat/thread_pool.asciidoc b/docs/reference/cat/thread_pool.asciidoc deleted file mode 100644 index 1d8517f170aed..0000000000000 --- a/docs/reference/cat/thread_pool.asciidoc +++ /dev/null @@ -1,190 +0,0 @@ -[[cat-thread-pool]] -=== cat thread pool API -++++ -cat thread pool -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns thread pool statistics for each node in a cluster. Returned information -includes all <> and custom thread -pools. - - -[[cat-thread-pool-api-request]] -==== {api-request-title} - -`GET /_cat/thread_pool/` - -`GET /_cat/thread_pool` - -[[cat-thread-pool-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cat-thread-pool-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Comma-separated list of thread pool names used to limit the -request. Accepts wildcard expressions. - - -[[cat-thread-pool-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] -+ --- -If you do not specify which columns to include, the API returns the default -columns in the order listed below. If you explicitly specify one or more -columns, it only returns the specified columns. - -Valid columns are: - -`node_name`:: -(Default) Node name, such as `I8hydUG`. - -`name`:: -(Default) Name of the thread pool, such as `analyze` or `generic`. - -`active`, `a`:: -(Default) Number of active threads in the current thread pool. - -`queue`,`q`:: -(Default) Number of tasks in the queue for the current thread pool. - -`rejected`, `r`:: -(Default) Number of tasks rejected by the thread pool executor. - -`completed`, `c`:: -Number of tasks completed by the thread pool executor. - -`core`, `cr`:: -Configured core number of active threads allowed in the current thread pool. - -`ephemeral_id`,`eid`:: -Ephemeral node ID. - -`host`, `h`:: -Hostname for the current node. - -`ip`, `i`:: -IP address for the current node. - -`keep_alive`, `k`:: -Configured keep alive time for threads. - -`largest`, `l`:: -Highest number of active threads in the current thread pool. - -`max`, `mx`:: -Configured maximum number of active threads allowed in the current thread pool. - -`node_id`, `id`:: -ID of the node, such as `k0zy`. - -`pid`, `p`:: -Process ID of the running node. - -`pool_size`, `psz`:: -Number of threads in the current thread pool. - -`port`, `po`:: -Bound transport port for the current node. - -`queue_size`, `qs`:: -Maximum number of tasks permitted in the queue for the current thread pool. - -`size`, `sz`:: -Configured fixed number of active threads allowed in the current thread pool. - -`type`, `t`:: -Type of thread pool. Returned values are `fixed`, `fixed_auto_queue_size`, `direct`, or `scaling`. - --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=local] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-thread-pool-api-example]] -==== {api-examples-title} - -[[cat-thread-pool-api-ex-default]] -===== Example with default columns - -[source,console] --------------------------------------------------- -GET /_cat/thread_pool --------------------------------------------------- - -The API returns the following response: - -[source,txt] --------------------------------------------------- -node-0 analyze 0 0 0 -... -node-0 fetch_shard_started 0 0 0 -node-0 fetch_shard_store 0 0 0 -node-0 flush 0 0 0 -... -node-0 write 0 0 0 --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./(node-0 \\S+ 0 0 0\n)*/] -// TESTRESPONSE[s/\d+/\\d+/ non_json] -// The substitutions do two things: -// 1. Expect any number of extra thread pools. This allows us to only list a -// few thread pools. The list would be super long otherwise. In addition, -// if xpack is installed then the list will contain more thread pools and -// this way we don't have to assert about them. -// 2. Expect any number of active, queued, or rejected items. We really don't -// know how many there will be and we just want to assert that there are -// numbers in the response, not *which* numbers are there. - - -[[cat-thread-pool-api-ex-headings]] -===== Example with explicit columns - -The following API request returns the `id`, `name`, `active`, `rejected`, and -`completed` columns. The request limits returned information to the `generic` -thread pool. - -[source,console] --------------------------------------------------- -GET /_cat/thread_pool/generic?v=true&h=id,name,active,rejected,completed --------------------------------------------------- - -The API returns the following response: - -[source,txt] --------------------------------------------------- -id name active rejected completed -0EWUhXeBQtaVGlexUeVwMg generic 0 0 70 --------------------------------------------------- -// TESTRESPONSE[s/0EWUhXeBQtaVGlexUeVwMg/[\\w-]+/ s/\d+/\\d+/ non_json] - diff --git a/docs/reference/cat/trainedmodel.asciidoc b/docs/reference/cat/trainedmodel.asciidoc deleted file mode 100644 index 378238323f50b..0000000000000 --- a/docs/reference/cat/trainedmodel.asciidoc +++ /dev/null @@ -1,137 +0,0 @@ -[role="xpack"] -[[cat-trained-model]] -=== cat trained model API -++++ -cat trained model -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns configuration and usage information about {ml-docs}/ml-nlp-deploy-models.html[{infer} trained models]. - - -[[cat-trained-model-request]] -==== {api-request-title} - -`GET /_cat/ml/trained_models` - - -[[cat-trained-model-prereqs]] -==== {api-prereq-title} - -If the {es} {security-features} are enabled, you must have the following -privileges: - -* cluster: `monitor_ml` - -For more information, see <> and {ml-docs-setup-privileges}. - - -//// -[[cat-trained-model-desc]] -==== {api-description-title} -//// - - -[[cat-trained-model-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bytes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] -+ -If you do not specify which columns to include, the API returns the default -columns. If you explicitly specify one or more columns, it returns only the -specified columns. -+ -Valid columns are: - -`create_time`, `ct`::: -The time when the trained model was created. - -`created_by`, `c`, `createdBy`::: -Information on the creator of the trained model. - -`data_frame_analytics_id`, `df`, `dataFrameAnalytics`::: -Identifier for the {dfanalytics-job} that created the model. Only displayed if -it is still available. - -`description`, `d`::: -The description of the trained model. - -`heap_size`, `hs`, `modelHeapSize`::: -(Default) -The estimated heap size to keep the trained model in memory. - -`id`::: -(Default) -Identifier for the trained model. - -`ingest.count`, `ic`, `ingestCount`::: -The total number of documents that are processed by the model. - -`ingest.current`, `icurr`, `ingestCurrent`::: -The total number of document that are currently being handled by the trained -model. - -`ingest.failed`, `if`, `ingestFailed`::: -The total number of failed ingest attempts with the trained model. - -`ingest.pipelines`, `ip`, `ingestPipelines`::: -(Default) -The total number of ingest pipelines that are referencing the trained model. - -`ingest.time`, `it`, `ingestTime`::: -The total time that is spent processing documents with the trained model. - -`license`, `l`::: -The license level of the trained model. - -`operations`, `o`, `modelOperations`::: -(Default) -The estimated number of operations to use the trained model. This number helps -measuring the computational complexity of the model. - -`version`, `v`::: -The {es} version number in which the trained model was created. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - - -[[cat-trained-model-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET _cat/ml/trained_models?h=c,o,l,ct,v&v=true --------------------------------------------------- -// TEST[skip:kibana sample data] - - -[source,console-result] ----- -id created_by operations license create_time version -ddddd-1580216177138 _xpack 196 PLATINUM 2020-01-28T12:56:17.138Z 8.0.0 -flight-regress-1580215685537 _xpack 102 PLATINUM 2020-01-28T12:48:05.537Z 8.0.0 -lang_ident_model_1 _xpack 39629 BASIC 2019-12-05T12:28:34.594Z 7.6.0 ----- -// TESTRESPONSE[skip:kibana sample data] diff --git a/docs/reference/cat/transforms.asciidoc b/docs/reference/cat/transforms.asciidoc deleted file mode 100644 index 8e5becc5fa76d..0000000000000 --- a/docs/reference/cat/transforms.asciidoc +++ /dev/null @@ -1,215 +0,0 @@ -[role="xpack"] -[[cat-transforms]] -=== cat {transforms} API -++++ -cat transforms -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cat[Compact and aligned text (CAT) APIs].. --- - -[IMPORTANT] -==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the <>. -==== - -Returns configuration and usage information about <>. - -[[cat-transforms-api-request]] -==== {api-request-title} - -`GET /_cat/transforms/` + - -`GET /_cat/transforms/_all` + - -`GET /_cat/transforms/*` + - -`GET /_cat/transforms` - -[[cat-transforms-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `monitor_transform` -cluster privileges to use this API. The built-in `transform_user` role has these -privileges. For more information, see <> and -<>. - -//[[cat-transforms-api-desc]] -//==== {api-description-title} - -[[cat-transforms-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=transform-id-wildcard] - -[[cat-transforms-api-query-params]] -==== {api-query-parms-title} - -`allow_no_match`:: -(Optional, Boolean) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=allow-no-match-transforms1] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format] - -`from`:: -(Optional, integer) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=from-transforms] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h] -+ -If you do not specify which columns to include, the API returns the default -columns. If you explicitly specify one or more columns, it returns only the -specified columns. -+ -Valid columns are: - -`changes_last_detection_time`, `cldt`::: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=checkpointing-changes-last-detected-at] - -`checkpoint`, `cp`::: -(Default) -The sequence number for the checkpoint. - -`checkpoint_duration_time_exp_avg`, `cdtea`, `checkpointTimeExpAvg`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=exponential-avg-checkpoint-duration-ms] - -`checkpoint_progress`, `c`, `checkpointProgress`::: -(Default) -The progress of the next checkpoint that is currently in progress. - -`create_time`, `ct`, `createTime`::: -The time the {transform} was created. - -`delete_time`, `dtime`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=delete-time-ms] - -`description`, `d`::: -The description of the {transform}. - -`dest_index`, `di`, `destIndex`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=dest-index] - -`documents_deleted`, `docd`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=docs-deleted-transform] - -`documents_indexed`, `doci`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=docs-indexed] - -`docs_per_second`, `dps`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=transform-settings-docs-per-second] - -`documents_processed`, `docp`::: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=docs-processed] - -`frequency`, `f`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=frequency] - -`id`::: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=transform-id] - -`index_failure`, `if`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=index-failures] - -`index_time`, `itime`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=index-time-ms] - -`index_total`, `it`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=index-total] - -`indexed_documents_exp_avg`, `idea`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=exponential-avg-documents-indexed] - -`last_search_time`, `lst`, `lastSearchTime`::: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=checkpointing-last-search-time] - -`max_page_search_size`, `mpsz`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=transform-settings-max-page-search-size] - -`pages_processed`, `pp`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=pages-processed] - -`pipeline`, `p`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=dest-pipeline] - -`processed_documents_exp_avg`, `pdea`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=exponential-avg-documents-processed] - -`processing_time`, `pt`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=processing-time-ms] - -`reason`, `r`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=state-transform-reason] - -`search_failure`, `sf`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=search-failures] - -`search_time`, `stime`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=search-time-ms] - -`search_total`, `st`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=search-total] - -`source_index`, `si`, `sourceIndex`::: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=source-index-transforms] - -`state`, `s`::: -(Default) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=state-transform] - -`transform_type`, `tt`::: -Indicates the type of {transform}: `batch` or `continuous`. - -`trigger_count`, `tc`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=trigger-count] - -`version`, `v`::: -The version of {es} that existed on the node when the {transform} was -created. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-s] - -`size`:: -(Optional, integer) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=size-transforms] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=time] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-v] - -[[cat-transforms-api-examples]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET /_cat/transforms?v=true&format=json --------------------------------------------------- -// TEST[skip:kibana sample data] - -[source,console-result] ----- -[ - { - "id" : "ecommerce_transform", - "state" : "started", - "checkpoint" : "1", - "documents_processed" : "705", - "checkpoint_progress" : "100.00", - "changes_last_detection_time" : null - } -] ----- -// TESTRESPONSE[skip:kibana sample data] diff --git a/docs/reference/ccr/apis/auto-follow/delete-auto-follow-pattern.asciidoc b/docs/reference/ccr/apis/auto-follow/delete-auto-follow-pattern.asciidoc deleted file mode 100644 index 2ad50d68b923a..0000000000000 --- a/docs/reference/ccr/apis/auto-follow/delete-auto-follow-pattern.asciidoc +++ /dev/null @@ -1,87 +0,0 @@ -[role="xpack"] -[[ccr-delete-auto-follow-pattern]] -=== Delete auto-follow pattern API -++++ -Delete auto-follow pattern -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Delete {ccr} <>. - -[[ccr-delete-auto-follow-pattern-request]] -==== {api-request-title} - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /_ccr/auto_follow/my_auto_follow_pattern -{ - "remote_cluster" : "remote_cluster", - "leader_index_patterns" : - [ - "leader_index" - ], - "follow_index_pattern" : "{{leader_index}}-follower" -} --------------------------------------------------- -// TEST[setup:remote_cluster] -// TESTSETUP - -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_ccr/auto_follow/ --------------------------------------------------- -// TEST[s//my_auto_follow_pattern/] - -[[ccr-delete-auto-follow-pattern-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `manage_ccr` cluster -privileges on the cluster that contains the follower index. For more information, -see <>. - -[[ccr-delete-auto-follow-pattern-desc]] -==== {api-description-title} - -This API deletes a configured collection of -<>. - -[[ccr-delete-auto-follow-pattern-path-parms]] -==== {api-path-parms-title} - -``:: - (Required, string) Specifies the auto-follow pattern collection to delete. - -[[ccr-delete-auto-follow-pattern-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[ccr-delete-auto-follow-pattern-examples]] -==== {api-examples-title} - -This example deletes an auto-follow pattern collection named -`my_auto_follow_pattern`: - -[source,console] --------------------------------------------------- -DELETE /_ccr/auto_follow/my_auto_follow_pattern --------------------------------------------------- -// TEST[setup:remote_cluster] - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged" : true -} --------------------------------------------------- diff --git a/docs/reference/ccr/apis/auto-follow/get-auto-follow-pattern.asciidoc b/docs/reference/ccr/apis/auto-follow/get-auto-follow-pattern.asciidoc deleted file mode 100644 index 951185d14e920..0000000000000 --- a/docs/reference/ccr/apis/auto-follow/get-auto-follow-pattern.asciidoc +++ /dev/null @@ -1,122 +0,0 @@ -[role="xpack"] -[[ccr-get-auto-follow-pattern]] -=== Get auto-follow pattern API -++++ -Get auto-follow pattern -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Get {ccr} <>. - -[[ccr-get-auto-follow-pattern-request]] -==== {api-request-title} - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /_ccr/auto_follow/my_auto_follow_pattern -{ - "remote_cluster" : "remote_cluster", - "leader_index_patterns" : - [ - "leader_index*" - ], - "leader_index_exclusion_patterns": - [ - "leader_index_001" - ], - "follow_index_pattern" : "{{leader_index}}-follower" -} --------------------------------------------------- -// TEST[setup:remote_cluster] -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE /_ccr/auto_follow/my_auto_follow_pattern --------------------------------------------------- -// TEST -// TEARDOWN - -////////////////////////// - -[source,console] --------------------------------------------------- -GET /_ccr/auto_follow/ --------------------------------------------------- - -[source,console] --------------------------------------------------- -GET /_ccr/auto_follow/ --------------------------------------------------- -// TEST[s//my_auto_follow_pattern/] - -[[ccr-get-auto-follow-pattern-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `manage_ccr` cluster -privileges on the cluster that contains the follower index. For more information, -see <>. - -[[ccr-get-auto-follow-pattern-desc]] -==== {api-description-title} - -This API gets configured <>. -This API will return the specified auto-follow pattern collection. - -[[ccr-get-auto-follow-pattern-path-parms]] -==== {api-path-parms-title} - -``:: - (Optional, string) Specifies the auto-follow pattern collection that you want - to retrieve. If you do not specify a name, the API returns information for all - collections. - -[[ccr-get-auto-follow-pattern-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[ccr-get-auto-follow-pattern-examples]] -==== {api-examples-title} - -This example retrieves information about an auto-follow pattern collection -named `my_auto_follow_pattern`: - -[source,console] --------------------------------------------------- -GET /_ccr/auto_follow/my_auto_follow_pattern --------------------------------------------------- -// TEST[setup:remote_cluster] - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "patterns": [ - { - "name": "my_auto_follow_pattern", - "pattern": { - "active": true, - "remote_cluster" : "remote_cluster", - "leader_index_patterns" : - [ - "leader_index*" - ], - "leader_index_exclusion_patterns": - [ - "leader_index_001" - ], - "follow_index_pattern" : "{{leader_index}}-follower" - } - } - ] -} --------------------------------------------------- diff --git a/docs/reference/ccr/apis/auto-follow/pause-auto-follow-pattern.asciidoc b/docs/reference/ccr/apis/auto-follow/pause-auto-follow-pattern.asciidoc deleted file mode 100644 index 462ee213ed4e4..0000000000000 --- a/docs/reference/ccr/apis/auto-follow/pause-auto-follow-pattern.asciidoc +++ /dev/null @@ -1,97 +0,0 @@ -[role="xpack"] -[[ccr-pause-auto-follow-pattern]] -=== Pause auto-follow pattern API -++++ -Pause auto-follow pattern -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Pauses a {ccr} <>. - -[[ccr-pause-auto-follow-pattern-request]] -==== {api-request-title} - -`POST /_ccr/auto_follow//pause` - -[[ccr-pause-auto-follow-pattern-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `manage_ccr` cluster -privileges on the cluster that contains the follower index. For more information, -see <>. - -[[ccr-pause-auto-follow-pattern-desc]] -==== {api-description-title} - -This API pauses an <>. When this API returns, the auto-follow pattern -is inactive and ignores any new index created on the remote cluster that matches any of -the auto-follow's patterns. Paused auto-follow patterns appear with the `active` field -set to `false` in the <>. - -You can resume auto-following with the <>. -Once resumed, the auto-follow pattern is active again and automatically configure -follower indices for newly created indices on the remote cluster that match its patterns. -Remote indices created while the -pattern was paused will also be followed, unless they have been deleted or closed in the -meantime. - -[[ccr-pause-auto-follow-pattern-path-parms]] -==== {api-path-parms-title} - -``:: - (Required, string) Name of the auto-follow pattern to pause. - -[[ccr-pause-auto-follow-pattern-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[ccr-pause-auto-follow-pattern-examples]] -==== {api-examples-title} - -This example pauses an auto-follow pattern named `my_auto_follow_pattern`: -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /_ccr/auto_follow/my_auto_follow_pattern -{ - "remote_cluster" : "remote_cluster", - "leader_index_patterns" : - [ - "leader_index" - ], - "follow_index_pattern" : "{{leader_index}}-follower" -} --------------------------------------------------- -// TEST[setup:remote_cluster] -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE /_ccr/auto_follow/my_auto_follow_pattern --------------------------------------------------- -// TEST -// TEARDOWN - -////////////////////////// - -[source,console] --------------------------------------------------- -POST /_ccr/auto_follow/my_auto_follow_pattern/pause --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged" : true -} --------------------------------------------------- diff --git a/docs/reference/ccr/apis/auto-follow/put-auto-follow-pattern.asciidoc b/docs/reference/ccr/apis/auto-follow/put-auto-follow-pattern.asciidoc deleted file mode 100644 index 672a11302fdd5..0000000000000 --- a/docs/reference/ccr/apis/auto-follow/put-auto-follow-pattern.asciidoc +++ /dev/null @@ -1,162 +0,0 @@ -[role="xpack"] -[[ccr-put-auto-follow-pattern]] -=== Create auto-follow pattern API -++++ -Create auto-follow pattern -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Creates a {ccr} <>. - -[[ccr-put-auto-follow-pattern-request]] -==== {api-request-title} - -[source,console] --------------------------------------------------- -PUT /_ccr/auto_follow/ -{ - "remote_cluster" : "", - "leader_index_patterns" : - [ - "" - ], - "leader_index_exclusion_patterns": - [ - "" - ], - "follow_index_pattern" : "" -} --------------------------------------------------- -// TEST[setup:remote_cluster] -// TEST[s//auto_follow_pattern_name/] -// TEST[s//remote_cluster/] -// TEST[s//leader_index*/] -// TEST[s///] -// TEST[s//{{leader_index}}-follower/] - -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_ccr/auto_follow/auto_follow_pattern_name --------------------------------------------------- -// TEST[continued] - -////////////////////////// - -[[ccr-put-auto-follow-pattern-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `read` and `monitor` -index privileges for the leader index patterns. You must also have `manage_ccr` -cluster privileges on the cluster that contains the follower index. For more -information, see <>. - -[[ccr-put-auto-follow-pattern-desc]] -==== {api-description-title} - -This API creates a new named collection of -<> against the remote cluster -specified in the request body. Newly created indices on the remote cluster -matching any of the specified patterns will be automatically configured as follower -indices. Indices on the remote cluster that were created before the auto-follow -pattern is created won't be auto-followed even if they match the pattern. - -This API can also be used to update existing -<>. Note that follower indices that were configured automatically -before updating an auto-follow pattern will remain unchanged even if they don't match against -the new patterns. - -[[ccr-put-auto-follow-pattern-path-parms]] -==== {api-path-parms-title} -``:: - (Required, string) The name of the collection of auto-follow patterns. - -[[ccr-put-auto-follow-pattern-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[ccr-put-auto-follow-pattern-request-body]] -==== {api-request-body-title} - -`remote_cluster`:: - (Required, string) The <> containing - the leader indices to match against. - -`leader_index_patterns`:: - (Optional, array) An array of simple index patterns to match against indices - in the remote cluster specified by the `remote_cluster` field. - -`leader_index_exclusion_patterns`:: - (Optional, array) An array of simple index patterns that can be used to exclude indices - from being auto-followed. Indices in the remote cluster whose names are matching one or - more `leader_index_patterns` and one or more `leader_index_exclusion_patterns` won't be followed. - -`follow_index_pattern`:: - (Optional, string) The name of follower index. The template `{{leader_index}}` can be used to - derive the name of the follower index from the name of the leader index. When following a data - stream, the `follow_index_pattern` will be used for renaming not only the leader index, but also - the data stream containing the leader index. For example, a data stream called - `logs-mysql-default` with a backing index of `.ds-logs-mysql-default-2022-01-01-000001` and a - `follow_index_pattern` of `{{leader_index}}_copy` will replicate the data stream as - `logs-mysql-default_copy` and the backing index as - `.ds-logs-mysql-default_copy-2022-01-01-000001`. - -include::../follow-request-body.asciidoc[] - -[[ccr-put-auto-follow-pattern-examples]] -==== {api-examples-title} - -This example creates an auto-follow pattern named `my_auto_follow_pattern`: - -[source,console] --------------------------------------------------- -PUT /_ccr/auto_follow/my_auto_follow_pattern -{ - "remote_cluster" : "remote_cluster", - "leader_index_patterns" : - [ - "leader_index*" - ], - "follow_index_pattern" : "{{leader_index}}-follower", - "settings": { - "index.number_of_replicas": 0 - }, - "max_read_request_operation_count" : 1024, - "max_outstanding_read_requests" : 16, - "max_read_request_size" : "1024k", - "max_write_request_operation_count" : 32768, - "max_write_request_size" : "16k", - "max_outstanding_write_requests" : 8, - "max_write_buffer_count" : 512, - "max_write_buffer_size" : "512k", - "max_retry_delay" : "10s", - "read_poll_timeout" : "30s" -} --------------------------------------------------- -// TEST[setup:remote_cluster] - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged" : true -} --------------------------------------------------- - -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_ccr/auto_follow/my_auto_follow_pattern --------------------------------------------------- -// TEST[continued] - -////////////////////////// diff --git a/docs/reference/ccr/apis/auto-follow/resume-auto-follow-pattern.asciidoc b/docs/reference/ccr/apis/auto-follow/resume-auto-follow-pattern.asciidoc deleted file mode 100644 index d97c41d67c1ea..0000000000000 --- a/docs/reference/ccr/apis/auto-follow/resume-auto-follow-pattern.asciidoc +++ /dev/null @@ -1,98 +0,0 @@ -[role="xpack"] -[[ccr-resume-auto-follow-pattern]] -=== Resume auto-follow pattern API -++++ -Resume auto-follow pattern -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Resumes a {ccr} <>. - -[[ccr-resume-auto-follow-pattern-request]] -==== {api-request-title} - -`POST /_ccr/auto_follow//resume` - -[[ccr-resume-auto-follow-pattern-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `manage_ccr` cluster -privileges on the cluster that contains the follower index. For more information, -see <>. - -[[ccr-resume-auto-follow-pattern-desc]] -==== {api-description-title} - -This API resumes an <> that has been paused with the -<>. When this API -returns, the auto-follow pattern will resume configuring following indices for -newly created indices on the remote cluster that match its patterns. Remote -indices created while the pattern was paused will also be followed, unless they -have been deleted or closed in the meantime. - -[[ccr-resume-auto-follow-pattern-path-parms]] -==== {api-path-parms-title} - -``:: - (Required, string) Specifies the name of the auto-follow pattern to resume. - -[[ccr-resume-auto-follow-pattern-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[ccr-resume-auto-follow-pattern-examples]] -==== {api-examples-title} - -This example resumes the activity of a paused auto-follow pattern -named `my_auto_follow_pattern`: -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /_ccr/auto_follow/my_auto_follow_pattern -{ - "remote_cluster" : "remote_cluster", - "leader_index_patterns" : - [ - "leader_index" - ], - "follow_index_pattern" : "{{leader_index}}-follower" -} --------------------------------------------------- -// TEST[setup:remote_cluster] -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE /_ccr/auto_follow/my_auto_follow_pattern --------------------------------------------------- -// TEST -// TEARDOWN - -[source,console] --------------------------------------------------- -POST /_ccr/auto_follow/my_auto_follow_pattern/pause --------------------------------------------------- -// TEST - -////////////////////////// -[source,console] --------------------------------------------------- -POST /_ccr/auto_follow/my_auto_follow_pattern/resume --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged" : true -} --------------------------------------------------- diff --git a/docs/reference/ccr/apis/ccr-apis.asciidoc b/docs/reference/ccr/apis/ccr-apis.asciidoc deleted file mode 100644 index 0decb98197d31..0000000000000 --- a/docs/reference/ccr/apis/ccr-apis.asciidoc +++ /dev/null @@ -1,58 +0,0 @@ -[role="xpack"] -[[ccr-apis]] -== {ccr-cap} APIs - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -You can use the following APIs to perform <> operations. - -[discrete] -[[ccr-api-top-level]] -=== Top-Level - -* <> - -[discrete] -[[ccr-api-follow]] -=== Follow - -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -[discrete] -[[ccr-api-auto-follow]] -=== Auto-follow - -* <> -* <> -* <> -* <> -* <> - -// top-level -include::get-ccr-stats.asciidoc[] - -// follow -include::follow/put-follow.asciidoc[] -include::follow/post-pause-follow.asciidoc[] -include::follow/post-resume-follow.asciidoc[] -include::follow/post-unfollow.asciidoc[] -include::follow/post-forget-follower.asciidoc[] -include::follow/get-follow-stats.asciidoc[] -include::follow/get-follow-info.asciidoc[] - -// auto-follow -include::auto-follow/put-auto-follow-pattern.asciidoc[] -include::auto-follow/delete-auto-follow-pattern.asciidoc[] -include::auto-follow/get-auto-follow-pattern.asciidoc[] -include::auto-follow/pause-auto-follow-pattern.asciidoc[] -include::auto-follow/resume-auto-follow-pattern.asciidoc[] diff --git a/docs/reference/ccr/apis/follow-request-body.asciidoc b/docs/reference/ccr/apis/follow-request-body.asciidoc deleted file mode 100644 index caab04cb564ee..0000000000000 --- a/docs/reference/ccr/apis/follow-request-body.asciidoc +++ /dev/null @@ -1,105 +0,0 @@ -`settings`:: - (object) Settings to override from the leader index. Note that certain - settings can not be overrode (e.g., `index.number_of_shards`). - -// tag::ccr-resume-follow-request-body[] -`max_read_request_operation_count`:: - (integer) The maximum number of operations to pull per read from the remote - cluster. - -`max_outstanding_read_requests`:: - (long) The maximum number of outstanding reads requests from the remote - cluster. - -`max_read_request_size`:: - (<>) The maximum size in bytes of per read of a batch - of operations pulled from the remote cluster. - -`max_write_request_operation_count`:: - (integer) The maximum number of operations per bulk write request executed on - the follower. - -`max_write_request_size`:: - (<>) The maximum total bytes of operations per bulk write request - executed on the follower. - -`max_outstanding_write_requests`:: - (integer) The maximum number of outstanding write requests on the follower. - -`max_write_buffer_count`:: - (integer) The maximum number of operations that can be queued for writing. - When this limit is reached, reads from the remote cluster will be deferred - until the number of queued operations goes below the limit. - -`max_write_buffer_size`:: - (<>) The maximum total bytes of operations that can be - queued for - writing. When this limit is reached, reads from the remote cluster will be - deferred until the total bytes of queued operations goes below the limit. - -`max_retry_delay`:: - (<>) The maximum time to wait before retrying an - operation that failed exceptionally. An exponential backoff strategy is - employed when retrying. - -`read_poll_timeout`:: - (<>) The maximum time to wait for new operations on the - remote cluster when the follower index is synchronized with the leader index. - When the timeout has elapsed, the poll for operations will return to the - follower so that it can update some statistics. Then the follower will - immediately attempt to read from the leader again. - -===== Default values - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /follower_index/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster" : "remote_cluster", - "leader_index" : "leader_index" -} --------------------------------------------------- -// TESTSETUP -// TEST[setup:remote_cluster_and_leader_index] - -[source,console] --------------------------------------------------- -POST /follower_index/_ccr/pause_follow --------------------------------------------------- -// TEARDOWN - -[source,console] --------------------------------------------------- -GET /follower_index/_ccr/info?filter_path=follower_indices.parameters --------------------------------------------------- - -////////////////////////// - -The following output from the follow info api describes all the default -values for the above described index follow request parameters: - -[source,console-result] --------------------------------------------------- -{ - "follower_indices" : [ - { - "parameters" : { - "max_read_request_operation_count" : 5120, - "max_read_request_size" : "32mb", - "max_outstanding_read_requests" : 12, - "max_write_request_operation_count" : 5120, - "max_write_request_size" : "9223372036854775807b", - "max_outstanding_write_requests" : 9, - "max_write_buffer_count" : 2147483647, - "max_write_buffer_size" : "512mb", - "max_retry_delay" : "500ms", - "read_poll_timeout" : "1m" - } - } - ] -} - --------------------------------------------------- -// end::ccr-resume-follow-request-body[] \ No newline at end of file diff --git a/docs/reference/ccr/apis/follow/get-follow-info.asciidoc b/docs/reference/ccr/apis/follow/get-follow-info.asciidoc deleted file mode 100644 index 4c05faa0a7db8..0000000000000 --- a/docs/reference/ccr/apis/follow/get-follow-info.asciidoc +++ /dev/null @@ -1,209 +0,0 @@ -[role="xpack"] -[[ccr-get-follow-info]] -=== Get follower info API -++++ -Get follower info -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Retrieves information about all <> follower indices. - -[[ccr-get-follow-info-request]] -==== {api-request-title} - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /follower_index/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster" : "remote_cluster", - "leader_index" : "leader_index" -} --------------------------------------------------- -// TESTSETUP -// TEST[setup:remote_cluster_and_leader_index] -////////////////////////// - -[source,console] --------------------------------------------------- -GET //_ccr/info --------------------------------------------------- -// TEST[s//follower_index/] - -[[ccr-get-follow-info-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `monitor` cluster -privileges. For more information, see <>. - -[[ccr-get-follow-info-desc]] -==== {api-description-title} - -This API lists the parameters and the status for each follower index. -For example, the results include follower index names, leader index names, -replication options and whether the follower indices are active or paused. - -[[ccr-get-follow-info-path-parms]] -==== {api-path-parms-title} - -``:: - (Required, string) A comma-delimited list of follower index patterns. - -[[ccr-get-follow-info-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[role="child_attributes"] -[[ccr-get-follow-info-response-body]] -==== {api-response-body-title} - -//Begin follower_indices -`follower_indices`:: -(array) An array of follower index statistics. -+ -.Properties of objects in `follower_indices` -[%collapsible%open] -==== -`follower_index`:: -(string) The name of the follower index. - -`leader_index`:: -(string) The name of the index in the leader cluster that is followed. - -//Begin parameters -`parameters`:: -(object) An object that encapsulates {ccr} parameters. If the follower index's `status` is `paused`, -this object is omitted. -+ -.Properties of `parameters` -[%collapsible%open] -===== -`max_outstanding_read_requests`:: -(long) The maximum number of outstanding read requests from the remote cluster. - -`max_outstanding_write_requests`:: -(integer) The maximum number of outstanding write requests on the follower. - -`max_read_request_operation_count`:: -(integer) The maximum number of operations to pull per read from the remote -cluster. - -`max_read_request_size`:: -(<>) The maximum size in bytes of per read of a batch of -operations pulled from the remote cluster. - -`max_retry_delay`:: -(<>) The maximum time to wait before retrying an -operation that failed exceptionally. An exponential backoff strategy is employed -when retrying. - -`max_write_buffer_count`:: -(integer) The maximum number of operations that can be queued for writing. When -this limit is reached, reads from the remote cluster are deferred until the -number of queued operations goes below the limit. - -`max_write_buffer_size`:: -(<>) The maximum total bytes of operations that can be -queued for writing. When this limit is reached, reads from the remote cluster -are deferred until the total bytes of queued operations goes below the limit. - -`max_write_request_operation_count`:: -(integer) The maximum number of operations per bulk write request executed on -the follower. - -`max_write_request_size`:: -(<>) The maximum total bytes of operations per bulk write -request executed on the follower. - -`read_poll_timeout`:: -(<>) The maximum time to wait for new operations on the -remote cluster when the follower index is synchronized with the leader index. -When the timeout has elapsed, the poll for operations returns to the follower so -that it can update some statistics, then the follower immediately attempts -to read from the leader again. -===== -//End parameters - -`remote_cluster`:: -(string) The <> that contains the -leader index. - -`status`:: -(string) Whether index following is `active` or `paused`. -==== -//End follower_indices - -[[ccr-get-follow-info-examples]] -==== {api-examples-title} - -This example retrieves follower info: - -[source,console] --------------------------------------------------- -GET /follower_index/_ccr/info --------------------------------------------------- - -If the follower index is `active`, the API returns the following results: - -[source,console-result] --------------------------------------------------- -{ - "follower_indices": [ - { - "follower_index": "follower_index", - "remote_cluster": "remote_cluster", - "leader_index": "leader_index", - "status": "active", - "parameters": { - "max_read_request_operation_count": 5120, - "max_read_request_size": "32mb", - "max_outstanding_read_requests": 12, - "max_write_request_operation_count": 5120, - "max_write_request_size": "9223372036854775807b", - "max_outstanding_write_requests": 9, - "max_write_buffer_count": 2147483647, - "max_write_buffer_size": "512mb", - "max_retry_delay": "500ms", - "read_poll_timeout": "1m" - } - } - ] -} --------------------------------------------------- - -//// -[source,console] --------------------------------------------------- -POST /follower_index/_ccr/pause_follow --------------------------------------------------- -// TEST[continued] - -[source,console] --------------------------------------------------- -GET /follower_index/_ccr/info --------------------------------------------------- -// TEST[continued] -//// - -If the follower index is `paused`, the API returns the following results: - -[source,console-result] --------------------------------------------------- -{ - "follower_indices": [ - { - "follower_index": "follower_index", - "remote_cluster": "remote_cluster", - "leader_index": "leader_index", - "status": "paused" - } - ] -} --------------------------------------------------- diff --git a/docs/reference/ccr/apis/follow/get-follow-stats.asciidoc b/docs/reference/ccr/apis/follow/get-follow-stats.asciidoc deleted file mode 100644 index 29000a98f64b4..0000000000000 --- a/docs/reference/ccr/apis/follow/get-follow-stats.asciidoc +++ /dev/null @@ -1,297 +0,0 @@ -[role="xpack"] -[[ccr-get-follow-stats]] -=== Get follower stats API -++++ -Get follower stats -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Get <> follower stats. - -[[ccr-get-follow-stats-request]] -==== {api-request-title} - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /follower_index/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster" : "remote_cluster", - "leader_index" : "leader_index" -} --------------------------------------------------- -// TESTSETUP -// TEST[setup:remote_cluster_and_leader_index] - -[source,console] --------------------------------------------------- -POST /follower_index/_ccr/pause_follow --------------------------------------------------- -// TEARDOWN - -////////////////////////// - -[source,console] --------------------------------------------------- -GET //_ccr/stats --------------------------------------------------- -// TEST[s//follower_index/] - -[[ccr-get-follow-stats-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `monitor` cluster -privileges on the cluster that contains the follower index. For more information, -see <>. - -[[ccr-get-follow-stats-desc]] -==== {api-description-title} - -This API gets follower stats. This API will return shard-level stats about the -following tasks associated with each shard for the specified indices. - -[[ccr-get-follow-stats-path-parms]] -==== {api-path-parms-title} - -``:: - (Required, string) A comma-delimited list of index patterns. - -[[ccr-get-follow-stats-query-params]] -==== {api-query-parms-title} - -`timeout`:: -(Optional, time) Controls the amount of time to wait for results. Defaults to unlimited. - -[role="child_attributes"] -[[ccr-get-follow-stats-response-body]] -==== {api-response-body-title} - -//Begin indices -`indices`:: -(array) An array of follower index statistics. -+ -.Properties of `indices` -[%collapsible%open] -==== -`fatal_exception`:: -(object) An object representing a fatal exception that cancelled the following -task. In this situation, the following task must be resumed manually with the -<>. - -`index`:: -(string) The name of the follower index. - -`total_global_checkpoint_lag`:: -(long) Indication of how much the follower is lagging the leader. This is the sum of the difference between the `leader_global_checkpoint` and the `follower_global_checkpoint` for all shards. - -//Begin shards -`shards`:: -(array) An array of shard-level following task statistics. -+ -.Properties of objects in `shards` -[%collapsible%open] -===== -`bytes_read`:: -(long) The total of transferred bytes read from the leader. -+ --- -NOTE: This is only an estimate and does not account for compression if enabled. - --- - -`failed_read_requests`:: -(long) The number of failed reads. - -`failed_write_requests`:: -(long) The number of failed bulk write requests executed on the follower. - -`follower_aliases_version`:: -(long) The index aliases version the follower is synced up to. - -`follower_global_checkpoint`:: -(long) The current global checkpoint on the follower. The difference between the -`leader_global_checkpoint` and the `follower_global_checkpoint` is an -indication of how much the follower is lagging the leader. - -`follower_index`:: -(string) The name of the follower index. - -`follower_mapping_version`:: -(long) The mapping version the follower is synced up to. - -`follower_max_seq_no`:: -(long) The current maximum sequence number on the follower. - -`follower_settings_version`:: -(long) The index settings version the follower is synced up to. - -`last_requested_seq_no`:: -(long) The starting sequence number of the last batch of operations requested -from the leader. - -`leader_global_checkpoint`:: -(long) The current global checkpoint on the leader known to the follower task. - -`leader_index`:: -(string) The name of the index in the leader cluster being followed. - -`leader_max_seq_no`:: -(long) The current maximum sequence number on the leader known to the follower -task. - -`operations_read`:: -(long) The total number of operations read from the leader. - -`operations_written`:: -(long) The number of operations written on the follower. - -`outstanding_read_requests`:: -(integer) The number of active read requests from the follower. - -`outstanding_write_requests`:: -(integer) The number of active bulk write requests on the follower. - -//Begin read_exceptions -`read_exceptions`:: -(array) An array of objects representing failed reads. -+ -.Properties of objects in `read_exceptions` -[%collapsible%open] -====== -`exception`:: -(object) Represents the exception that caused the read to fail. - -`from_seq_no`:: -(long) The starting sequence number of the batch requested from the leader. - -`retries`:: -(integer) The number of times the batch has been retried. -====== -//End read_exceptions - -`remote_cluster`:: -(string) The <> containing the leader -index. - -`shard_id`:: -(integer) The numerical shard ID, with values from 0 to one less than the -number of replicas. - -`successful_read_requests`:: -(long) The number of successful fetches. - -`successful_write_requests`:: -(long) The number of bulk write requests executed on the follower. - -`time_since_last_read_millis`:: -(long) The number of milliseconds since a read request was sent to the leader. -+ -NOTE: When the follower is caught up to the leader, this number will increase up -to the configured `read_poll_timeout` at which point another read request will -be sent to the leader. - -`total_read_remote_exec_time_millis`:: -(long) The total time reads spent executing on the remote cluster. - -`total_read_time_millis`:: -(long) The total time reads were outstanding, measured from the time a read was -sent to the leader to the time a reply was returned to the follower. - -`total_write_time_millis`:: -(long) The total time spent writing on the follower. - -`write_buffer_operation_count`:: -(integer) The number of write operations queued on the follower. - -`write_buffer_size_in_bytes`:: -(long) The total number of bytes of operations currently queued for writing. -===== -//End shards -==== -//End indices - -[[ccr-get-follow-stats-examples]] -==== {api-examples-title} - -This example retrieves follower stats: - -[source,console] --------------------------------------------------- -GET /follower_index/_ccr/stats --------------------------------------------------- - -The API returns the following results: - -[source,console-result] --------------------------------------------------- -{ - "indices" : [ - { - "index" : "follower_index", - "total_global_checkpoint_lag" : 256, - "shards" : [ - { - "remote_cluster" : "remote_cluster", - "leader_index" : "leader_index", - "follower_index" : "follower_index", - "shard_id" : 0, - "leader_global_checkpoint" : 1024, - "leader_max_seq_no" : 1536, - "follower_global_checkpoint" : 768, - "follower_max_seq_no" : 896, - "last_requested_seq_no" : 897, - "outstanding_read_requests" : 8, - "outstanding_write_requests" : 2, - "write_buffer_operation_count" : 64, - "follower_mapping_version" : 4, - "follower_settings_version" : 2, - "follower_aliases_version" : 8, - "total_read_time_millis" : 32768, - "total_read_remote_exec_time_millis" : 16384, - "successful_read_requests" : 32, - "failed_read_requests" : 0, - "operations_read" : 896, - "bytes_read" : 32768, - "total_write_time_millis" : 16384, - "write_buffer_size_in_bytes" : 1536, - "successful_write_requests" : 16, - "failed_write_requests" : 0, - "operations_written" : 832, - "read_exceptions" : [ ], - "time_since_last_read_millis" : 8 - } - ] - } - ] -} --------------------------------------------------- -// TESTRESPONSE[s/"total_global_checkpoint_lag" : 256/"total_global_checkpoint_lag" : 0/] -// TESTRESPONSE[s/"leader_global_checkpoint" : 1024/"leader_global_checkpoint" : $body.indices.0.shards.0.leader_global_checkpoint/] -// TESTRESPONSE[s/"leader_max_seq_no" : 1536/"leader_max_seq_no" : $body.indices.0.shards.0.leader_max_seq_no/] -// TESTRESPONSE[s/"follower_global_checkpoint" : 768/"follower_global_checkpoint" : $body.indices.0.shards.0.follower_global_checkpoint/] -// TESTRESPONSE[s/"follower_max_seq_no" : 896/"follower_max_seq_no" : $body.indices.0.shards.0.follower_max_seq_no/] -// TESTRESPONSE[s/"last_requested_seq_no" : 897/"last_requested_seq_no" : $body.indices.0.shards.0.last_requested_seq_no/] -// TESTRESPONSE[s/"outstanding_read_requests" : 8/"outstanding_read_requests" : $body.indices.0.shards.0.outstanding_read_requests/] -// TESTRESPONSE[s/"outstanding_write_requests" : 2/"outstanding_write_requests" : $body.indices.0.shards.0.outstanding_write_requests/] -// TESTRESPONSE[s/"write_buffer_operation_count" : 64/"write_buffer_operation_count" : $body.indices.0.shards.0.write_buffer_operation_count/] -// TESTRESPONSE[s/"follower_mapping_version" : 4/"follower_mapping_version" : $body.indices.0.shards.0.follower_mapping_version/] -// TESTRESPONSE[s/"follower_settings_version" : 2/"follower_settings_version" : $body.indices.0.shards.0.follower_settings_version/] -// TESTRESPONSE[s/"follower_aliases_version" : 8/"follower_aliases_version" : $body.indices.0.shards.0.follower_aliases_version/] -// TESTRESPONSE[s/"total_read_time_millis" : 32768/"total_read_time_millis" : $body.indices.0.shards.0.total_read_time_millis/] -// TESTRESPONSE[s/"total_read_remote_exec_time_millis" : 16384/"total_read_remote_exec_time_millis" : $body.indices.0.shards.0.total_read_remote_exec_time_millis/] -// TESTRESPONSE[s/"successful_read_requests" : 32/"successful_read_requests" : $body.indices.0.shards.0.successful_read_requests/] -// TESTRESPONSE[s/"failed_read_requests" : 0/"failed_read_requests" : $body.indices.0.shards.0.failed_read_requests/] -// TESTRESPONSE[s/"operations_read" : 896/"operations_read" : $body.indices.0.shards.0.operations_read/] -// TESTRESPONSE[s/"bytes_read" : 32768/"bytes_read" : $body.indices.0.shards.0.bytes_read/] -// TESTRESPONSE[s/"total_write_time_millis" : 16384/"total_write_time_millis" : $body.indices.0.shards.0.total_write_time_millis/] -// TESTRESPONSE[s/"write_buffer_size_in_bytes" : 1536/"write_buffer_size_in_bytes" : $body.indices.0.shards.0.write_buffer_size_in_bytes/] -// TESTRESPONSE[s/"successful_write_requests" : 16/"successful_write_requests" : $body.indices.0.shards.0.successful_write_requests/] -// TESTRESPONSE[s/"failed_write_requests" : 0/"failed_write_requests" : $body.indices.0.shards.0.failed_write_requests/] -// TESTRESPONSE[s/"operations_written" : 832/"operations_written" : $body.indices.0.shards.0.operations_written/] -// TESTRESPONSE[s/"time_since_last_read_millis" : 8/"time_since_last_read_millis" : $body.indices.0.shards.0.time_since_last_read_millis/] diff --git a/docs/reference/ccr/apis/follow/post-forget-follower.asciidoc b/docs/reference/ccr/apis/follow/post-forget-follower.asciidoc deleted file mode 100644 index 8a7887072f6a2..0000000000000 --- a/docs/reference/ccr/apis/follow/post-forget-follower.asciidoc +++ /dev/null @@ -1,166 +0,0 @@ -[role="xpack"] -[[ccr-post-forget-follower]] -=== Forget follower API -++++ -Forget follower -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Removes the <> follower retention leases from the leader. - -[[ccr-post-forget-follower-request]] -==== {api-request-title} - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /follower_index/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster" : "remote_cluster", - "leader_index" : "leader_index" -} --------------------------------------------------- -// TESTSETUP -// TEST[setup:remote_cluster_and_leader_index] - -[source,console] --------------------------------------------------- -POST /follower_index/_ccr/pause_follow --------------------------------------------------- -// TEARDOWN - -////////////////////////// - -[source,console] --------------------------------------------------- -POST //_ccr/forget_follower -{ - "follower_cluster" : "", - "follower_index" : "", - "follower_index_uuid" : "", - "leader_remote_cluster" : "" -} --------------------------------------------------- -// TEST[s//leader_index/] -// TEST[s//follower_cluster/] -// TEST[s//follower_index/] -// TEST[s//follower_index_uuid/] -// TEST[s//leader_remote_cluster/] -// TEST[skip_shard_failures] - -[source,console-result] --------------------------------------------------- -{ - "_shards" : { - "total" : 1, - "successful" : 1, - "failed" : 0, - "failures" : [ ] - } -} --------------------------------------------------- -// TESTRESPONSE[s/"total" : 1/"total" : $body._shards.total/] -// TESTRESPONSE[s/"successful" : 1/"successful" : $body._shards.successful/] -// TESTRESPONSE[s/"failed" : 0/"failed" : $body._shards.failed/] -// TESTRESPONSE[s/"failures" : \[ \]/"failures" : $body._shards.failures/] - -[[ccr-post-forget-follower-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `manage_leader_index` -index privileges for the leader index. For more information, see -<>. - -[[ccr-post-forget-follower-desc]] -==== {api-description-title} - -A following index takes out retention leases on its leader index. These -retention leases are used to increase the likelihood that the shards of the -leader index retain the history of operations that the shards of the following -index need to execute replication. When a follower index is converted to a -regular index via the <> (either via explicit -execution of this API, or implicitly via {ilm}), these retention leases are -removed. However, removing these retention leases can fail (e.g., if the remote -cluster containing the leader index is unavailable). While these retention -leases will eventually expire on their own, their extended existence can cause -the leader index to hold more history than necessary, and prevent {ilm} from -performing some operations on the leader index. This API exists to enable -manually removing these retention leases when the unfollow API was unable to do -so. - -NOTE: This API does not stop replication by a following index. If you use this -API targeting a follower index that is still actively following, the following -index will add back retention leases on the leader. The only purpose of this API -is to handle the case of failure to remove the following retention leases after -the <> is invoked. - -[[ccr-post-forget-follower-path-parms]] -==== {api-path-parms-title} - -``:: - (Required, string) The name of the leader index. - -[[ccr-post-forget-follower-query-params]] -==== {api-query-parms-title} - -`timeout`:: -(Optional, time) Controls the amount of time to wait for results. Defaults to unlimited. - -[[ccr-post-forget-follower-request-body]] -==== {api-request-body-title} - -`follower_cluster`:: - (Required, string) The name of the cluster containing the follower index. - -`follower_index`:: - (Required, string) The name of the follower index. - -`follower_index_uuid`:: - (Required, string) The UUID of the follower index. - -`leader_remote_cluster`:: - (Required, string) The alias (from the perspective of the cluster containing - the follower index) of the <> - containing the leader index. - -[[ccr-post-forget-follower-examples]] -==== {api-examples-title} - -This example removes the follower retention leases for `follower_index` from -`leader_index`. - -[source,console] --------------------------------------------------- -POST /leader_index/_ccr/forget_follower -{ - "follower_cluster" : "follower_cluster", - "follower_index" : "follower_index", - "follower_index_uuid" : "vYpnaWPRQB6mNspmoCeYyA", - "leader_remote_cluster" : "leader_cluster" -} --------------------------------------------------- -// TEST[skip_shard_failures] - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "_shards" : { - "total" : 1, - "successful" : 1, - "failed" : 0, - "failures" : [ ] - } -} --------------------------------------------------- -// TESTRESPONSE[s/"total" : 1/"total" : $body._shards.total/] -// TESTRESPONSE[s/"successful" : 1/"successful" : $body._shards.successful/] -// TESTRESPONSE[s/"failed" : 0/"failed" : $body._shards.failed/] -// TESTRESPONSE[s/"failures" : \[ \]/"failures" : $body._shards.failures/] diff --git a/docs/reference/ccr/apis/follow/post-pause-follow.asciidoc b/docs/reference/ccr/apis/follow/post-pause-follow.asciidoc deleted file mode 100644 index c49e9a49b56c9..0000000000000 --- a/docs/reference/ccr/apis/follow/post-pause-follow.asciidoc +++ /dev/null @@ -1,85 +0,0 @@ -[role="xpack"] -[[ccr-post-pause-follow]] -=== Pause follower API -++++ -Pause follower -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Pauses a <> follower index. - -[[ccr-post-pause-follow-request]] -==== {api-request-title} - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /follower_index/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster" : "remote_cluster", - "leader_index" : "leader_index" -} --------------------------------------------------- -// TESTSETUP -// TEST[setup:remote_cluster_and_leader_index] - -////////////////////////// - -[source,console] --------------------------------------------------- -POST //_ccr/pause_follow --------------------------------------------------- -// TEST[s//follower_index/] - -[[ccr-post-pause-follow-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `manage_ccr` cluster -privileges on the cluster that contains the follower index. For more information, -see <>. - -[[ccr-post-pause-follow-desc]] -==== {api-description-title} - -This API pauses a follower index. When this API returns, the follower index will -not fetch any additional operations from the leader index. You can resume -following with the <>. Pausing and -resuming a follower index can be used to change the configuration of the -following task. - -[[ccr-post-pause-follow-path-parms]] -==== {api-path-parms-title} - -``:: - (Required, string) The name of the follower index. - -[[ccr-post-pause-follow-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[ccr-post-pause-follow-examples]] -==== {api-examples-title} - -This example pauses a follower index named `follower_index`: - -[source,console] --------------------------------------------------- -POST /follower_index/_ccr/pause_follow --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged" : true -} --------------------------------------------------- diff --git a/docs/reference/ccr/apis/follow/post-resume-follow.asciidoc b/docs/reference/ccr/apis/follow/post-resume-follow.asciidoc deleted file mode 100644 index f6da0110d5c24..0000000000000 --- a/docs/reference/ccr/apis/follow/post-resume-follow.asciidoc +++ /dev/null @@ -1,113 +0,0 @@ -[role="xpack"] -[[ccr-post-resume-follow]] -=== Resume follower API -++++ -Resume follower -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Resumes a <> follower index. - -[[ccr-post-resume-follow-request]] -==== {api-request-title} - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /follower_index/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster" : "remote_cluster", - "leader_index" : "leader_index" -} - -POST /follower_index/_ccr/pause_follow --------------------------------------------------- -// TESTSETUP -// TEST[setup:remote_cluster_and_leader_index] - -[source,console] --------------------------------------------------- -POST /follower_index/_ccr/pause_follow --------------------------------------------------- -// TEARDOWN - -////////////////////////// - -[source,console] --------------------------------------------------- -POST //_ccr/resume_follow -{ -} --------------------------------------------------- -// TEST[s//follower_index/] -// TEST[s//remote_cluster/] -// TEST[s//leader_index/] - -[[ccr-post-resume-follow-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `write` and `monitor` -index privileges for the follower index. You must have `read` and `monitor` -index privileges for the leader index. You must also have `manage_ccr` cluster -privileges on the cluster that contains the follower index. For more information, -see <>. - -[[ccr-post-resume-follow-desc]] -==== {api-description-title} - -This API resumes a follower index that has been paused either explicitly with -the <> or implicitly due to -execution that can not be retried due to failure during following. When this API -returns, the follower index will resume fetching operations from the leader index. - -[[ccr-post-resume-follow-path-parms]] -==== {api-path-parms-title} - -``:: - (Required, string) The name of the follower index. - -[[ccr-post-resume-follow-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[ccr-post-resume-follow-request-body]] -==== {api-request-body-title} -include::../follow-request-body.asciidoc[tag=ccr-resume-follow-request-body] - -[[ccr-post-resume-follow-examples]] -==== {api-examples-title} - -This example resumes a follower index named `follower_index`: - -[source,console] --------------------------------------------------- -POST /follower_index/_ccr/resume_follow -{ - "max_read_request_operation_count" : 1024, - "max_outstanding_read_requests" : 16, - "max_read_request_size" : "1024k", - "max_write_request_operation_count" : 32768, - "max_write_request_size" : "16k", - "max_outstanding_write_requests" : 8, - "max_write_buffer_count" : 512, - "max_write_buffer_size" : "512k", - "max_retry_delay" : "10s", - "read_poll_timeout" : "30s" -} --------------------------------------------------- - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged" : true -} --------------------------------------------------- diff --git a/docs/reference/ccr/apis/follow/post-unfollow.asciidoc b/docs/reference/ccr/apis/follow/post-unfollow.asciidoc deleted file mode 100644 index 56b3195e8a134..0000000000000 --- a/docs/reference/ccr/apis/follow/post-unfollow.asciidoc +++ /dev/null @@ -1,92 +0,0 @@ -[role="xpack"] -[[ccr-post-unfollow]] -=== Unfollow API -++++ -Unfollow -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Converts a <> follower index to a regular index. - -[[ccr-post-unfollow-request]] -==== {api-request-title} - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /follower_index/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster" : "remote_cluster", - "leader_index" : "leader_index" -} - -POST /follower_index/_ccr/pause_follow - -POST /follower_index/_close --------------------------------------------------- -// TESTSETUP -// TEST[setup:remote_cluster_and_leader_index] - -////////////////////////// - -[source,console] --------------------------------------------------- -POST //_ccr/unfollow --------------------------------------------------- -// TEST[s//follower_index/] - -[[ccr-post-unfollow-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `manage_follow_index` -index privileges for the follower index. For more information, see -<>. - -[[ccr-post-unfollow-desc]] -==== {api-description-title} - -This API stops the following task associated with a follower index and removes -index metadata and settings associated with {ccr}. This enables the index to -treated as a regular index. The follower index must be paused and closed before -invoking the unfollow API. - -NOTE: Currently {ccr} does not support converting an existing regular index to a -follower index. Converting a follower index to a regular index is an -irreversible operation. - -[[ccr-post-unfollow-path-parms]] -==== {api-path-parms-title} - -``:: - (Required, string) The name of the follower index. - -[[ccr-post-unfollow-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[ccr-post-unfollow-examples]] -==== {api-examples-title} - -This example converts `follower_index` from a follower index to a regular index: - -[source,console] --------------------------------------------------- -POST /follower_index/_ccr/unfollow --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged" : true -} --------------------------------------------------- diff --git a/docs/reference/ccr/apis/follow/put-follow.asciidoc b/docs/reference/ccr/apis/follow/put-follow.asciidoc deleted file mode 100644 index d9538fc436563..0000000000000 --- a/docs/reference/ccr/apis/follow/put-follow.asciidoc +++ /dev/null @@ -1,145 +0,0 @@ -[role="xpack"] -[[ccr-put-follow]] -=== Create follower API -++++ -Create follower -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Creates a <> follower index. - -[[ccr-put-follow-request]] -==== {api-request-title} - -////////////////////////// - -[source,console] --------------------------------------------------- -POST /follower_index/_ccr/pause_follow --------------------------------------------------- -// TEARDOWN - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT //_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster" : "", - "leader_index" : "" -} --------------------------------------------------- -// TEST[setup:remote_cluster_and_leader_index] -// TEST[s//follower_index/] -// TEST[s//remote_cluster/] -// TEST[s//leader_index/] - -[[ccr-put-follow-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `write`, `monitor`, -and `manage_follow_index` index privileges for the follower index. You must have -`read` and `monitor` index privileges for the leader index. You must also have -`manage_ccr` cluster privileges on the cluster that contains the follower index. -For more information, see <>. - -[[ccr-put-follow-desc]] -==== {api-description-title} - -This API creates a new follower index that is configured to follow the -referenced leader index. When this API returns, the follower index exists, and -{ccr} starts replicating operations from the leader index to the follower index. - -[[ccr-put-follow-path-parms]] -==== {api-path-parms-title} - -``:: - (Required, string) The name of the follower index. - -[[ccr-put-follow-query-params]] -==== {api-query-parms-title} - -`wait_for_active_shards`:: - (Optional, integer) Specifies the number of shards to wait on being active before - responding. This defaults to waiting on none of the shards to be active. A - shard must be restored from the leader index before being active. Restoring a - follower shard requires transferring all the remote Lucene segment files to - the follower index. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[ccr-put-follow-request-body]] -==== {api-request-body-title} - -[[ccr-put-follow-request-body-leader_index]]`leader_index`:: - (Required, string) The name of the index in the leader cluster to follow. - -[[ccr-put-follow-request-body-remote_cluster]]`remote_cluster`:: - (Required, string) The <> containing - the leader index. - -[[ccr-put-follow-request-body-data_stream_name]]`data_stream_name`:: - (Optional, string) If the leader index is part of a <>, the name to - which the local data stream for the followed index should be renamed. For example, A request like: - -[source,console] --------------------------------------------------- -PUT /.ds-logs-mysql-default_copy-2022-01-01-000001/_ccr/follow -{ - "remote_cluster" : "remote_cluster", - "leader_index" : ".ds-logs-mysql-default-2022-01-01-000001", - "data_stream_name": "logs-mysql-default_copy" -} --------------------------------------------------- -// TEST[skip:no setup] - -Replicates the leader index `.ds-logs-mysql-default-2022-01-01-000001` into the follower index -`.ds-logs-mysql-default_copy-2022-01-01-000001` and will do so using the data stream -`logs-mysql-default_copy`, as opposed to the original leader data stream name of -`logs-mysql-default`. - -include::../follow-request-body.asciidoc[] - -[[ccr-put-follow-examples]] -==== {api-examples-title} - -This example creates a follower index named `follower_index`: - -[source,console] --------------------------------------------------- -PUT /follower_index/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster" : "remote_cluster", - "leader_index" : "leader_index", - "settings": { - "index.number_of_replicas": 0 - }, - "max_read_request_operation_count" : 1024, - "max_outstanding_read_requests" : 16, - "max_read_request_size" : "1024k", - "max_write_request_operation_count" : 32768, - "max_write_request_size" : "16k", - "max_outstanding_write_requests" : 8, - "max_write_buffer_count" : 512, - "max_write_buffer_size" : "512k", - "max_retry_delay" : "10s", - "read_poll_timeout" : "30s" -} --------------------------------------------------- -// TEST[setup:remote_cluster_and_leader_index] - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "follow_index_created" : true, - "follow_index_shards_acked" : true, - "index_following_started" : true -} --------------------------------------------------- diff --git a/docs/reference/ccr/apis/get-ccr-stats.asciidoc b/docs/reference/ccr/apis/get-ccr-stats.asciidoc deleted file mode 100644 index e92ad17e10437..0000000000000 --- a/docs/reference/ccr/apis/get-ccr-stats.asciidoc +++ /dev/null @@ -1,195 +0,0 @@ -[role="xpack"] -[[ccr-get-stats]] -=== Get {ccr} stats API -[subs="attributes"] -++++ -Get {ccr-init} stats -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-ccr[Cross-cluster replication APIs]. --- - -Get <> stats. - -[[ccr-get-stats-request]] -==== {api-request-title} - -////////////////////////// - -[source,console] --------------------------------------------------- -PUT /follower_index/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster" : "remote_cluster", - "leader_index" : "leader_index" -} --------------------------------------------------- -// TESTSETUP -// TEST[setup:remote_cluster_and_leader_index] - -[source,console] --------------------------------------------------- -POST /follower_index/_ccr/pause_follow --------------------------------------------------- -// TEARDOWN - -////////////////////////// - -[source,console] --------------------------------------------------- -GET /_ccr/stats --------------------------------------------------- - -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have `monitor` cluster -privileges on the cluster that contains the follower index. For more information, -see <>. - -[[ccr-get-stats-desc]] -==== {api-description-title} - -This API gets {ccr} stats. This API will return all stats related to {ccr}. In -particular, this API returns stats about auto-following, and returns the same -shard-level stats as in the <>. - -[[ccr-get-stats-query-params]] -==== {api-query-parms-title} - -`timeout`:: -(Optional, time) Controls the amount of time to wait for results. Defaults to unlimited. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[role="child_attributes"] -[[ccr-get-stats-response-body]] -==== {api-response-body-title} - -//Begin auto_follow_stats -`auto_follow_stats`:: -(object) An object representing stats for the auto-follow coordinator. -+ -.Properties of `auto_follow_stats` -[%collapsible%open] -==== -`number_of_failed_follow_indices`:: -(long) The number of indices that the auto-follow coordinator failed to -automatically follow. The causes of recent failures are captured in the logs -of the elected master node and in the -`auto_follow_stats.recent_auto_follow_errors` field. - -`number_of_failed_remote_cluster_state_requests`:: -(long) The number of times that the auto-follow coordinator failed to retrieve -the cluster state from a remote cluster registered in a collection of -auto-follow patterns. - -`number_of_successful_follow_indices`:: -(long) The number of indices that the auto-follow coordinator successfully -followed. - -`recent_auto_follow_errors`:: -(array) An array of objects representing failures by the auto-follow coordinator. -==== -//End auto_follow_stats - -`follow_stats`:: -(object) An object representing shard-level stats for follower indices; refer to -the details of the response in the -<>. - -[[ccr-get-stats-examples]] -==== {api-examples-title} - -This example retrieves {ccr} stats: - -[source,console] --------------------------------------------------- -GET /_ccr/stats --------------------------------------------------- - -The API returns the following results: - -[source,console-result] --------------------------------------------------- -{ - "auto_follow_stats" : { - "number_of_failed_follow_indices" : 0, - "number_of_failed_remote_cluster_state_requests" : 0, - "number_of_successful_follow_indices" : 1, - "recent_auto_follow_errors" : [], - "auto_followed_clusters" : [] - }, - "follow_stats" : { - "indices" : [ - { - "index" : "follower_index", - "total_global_checkpoint_lag" : 256, - "shards" : [ - { - "remote_cluster" : "remote_cluster", - "leader_index" : "leader_index", - "follower_index" : "follower_index", - "shard_id" : 0, - "leader_global_checkpoint" : 1024, - "leader_max_seq_no" : 1536, - "follower_global_checkpoint" : 768, - "follower_max_seq_no" : 896, - "last_requested_seq_no" : 897, - "outstanding_read_requests" : 8, - "outstanding_write_requests" : 2, - "write_buffer_operation_count" : 64, - "follower_mapping_version" : 4, - "follower_settings_version" : 2, - "follower_aliases_version" : 8, - "total_read_time_millis" : 32768, - "total_read_remote_exec_time_millis" : 16384, - "successful_read_requests" : 32, - "failed_read_requests" : 0, - "operations_read" : 896, - "bytes_read" : 32768, - "total_write_time_millis" : 16384, - "write_buffer_size_in_bytes" : 1536, - "successful_write_requests" : 16, - "failed_write_requests" : 0, - "operations_written" : 832, - "read_exceptions" : [ ], - "time_since_last_read_millis" : 8 - } - ] - } - ] - } -} --------------------------------------------------- -// TESTRESPONSE[s/"total_global_checkpoint_lag" : 256/"total_global_checkpoint_lag" : 0/] -// TESTRESPONSE[s/"number_of_failed_follow_indices" : 0/"number_of_failed_follow_indices" : $body.auto_follow_stats.number_of_failed_follow_indices/] -// TESTRESPONSE[s/"number_of_failed_remote_cluster_state_requests" : 0/"number_of_failed_remote_cluster_state_requests" : $body.auto_follow_stats.number_of_failed_remote_cluster_state_requests/] -// TESTRESPONSE[s/"number_of_successful_follow_indices" : 1/"number_of_successful_follow_indices" : $body.auto_follow_stats.number_of_successful_follow_indices/] -// TESTRESPONSE[s/"recent_auto_follow_errors" : \[\]/"recent_auto_follow_errors" : $body.auto_follow_stats.recent_auto_follow_errors/] -// TESTRESPONSE[s/"auto_followed_clusters" : \[\]/"auto_followed_clusters" : $body.auto_follow_stats.auto_followed_clusters/] -// TESTRESPONSE[s/"leader_global_checkpoint" : 1024/"leader_global_checkpoint" : $body.follow_stats.indices.0.shards.0.leader_global_checkpoint/] -// TESTRESPONSE[s/"leader_max_seq_no" : 1536/"leader_max_seq_no" : $body.follow_stats.indices.0.shards.0.leader_max_seq_no/] -// TESTRESPONSE[s/"follower_global_checkpoint" : 768/"follower_global_checkpoint" : $body.follow_stats.indices.0.shards.0.follower_global_checkpoint/] -// TESTRESPONSE[s/"follower_max_seq_no" : 896/"follower_max_seq_no" : $body.follow_stats.indices.0.shards.0.follower_max_seq_no/] -// TESTRESPONSE[s/"last_requested_seq_no" : 897/"last_requested_seq_no" : $body.follow_stats.indices.0.shards.0.last_requested_seq_no/] -// TESTRESPONSE[s/"outstanding_read_requests" : 8/"outstanding_read_requests" : $body.follow_stats.indices.0.shards.0.outstanding_read_requests/] -// TESTRESPONSE[s/"outstanding_write_requests" : 2/"outstanding_write_requests" : $body.follow_stats.indices.0.shards.0.outstanding_write_requests/] -// TESTRESPONSE[s/"write_buffer_operation_count" : 64/"write_buffer_operation_count" : $body.follow_stats.indices.0.shards.0.write_buffer_operation_count/] -// TESTRESPONSE[s/"follower_mapping_version" : 4/"follower_mapping_version" : $body.follow_stats.indices.0.shards.0.follower_mapping_version/] -// TESTRESPONSE[s/"follower_settings_version" : 2/"follower_settings_version" : $body.follow_stats.indices.0.shards.0.follower_settings_version/] -// TESTRESPONSE[s/"follower_aliases_version" : 8/"follower_aliases_version" : $body.follow_stats.indices.0.shards.0.follower_aliases_version/] -// TESTRESPONSE[s/"total_read_time_millis" : 32768/"total_read_time_millis" : $body.follow_stats.indices.0.shards.0.total_read_time_millis/] -// TESTRESPONSE[s/"total_read_remote_exec_time_millis" : 16384/"total_read_remote_exec_time_millis" : $body.follow_stats.indices.0.shards.0.total_read_remote_exec_time_millis/] -// TESTRESPONSE[s/"successful_read_requests" : 32/"successful_read_requests" : $body.follow_stats.indices.0.shards.0.successful_read_requests/] -// TESTRESPONSE[s/"failed_read_requests" : 0/"failed_read_requests" : $body.follow_stats.indices.0.shards.0.failed_read_requests/] -// TESTRESPONSE[s/"operations_read" : 896/"operations_read" : $body.follow_stats.indices.0.shards.0.operations_read/] -// TESTRESPONSE[s/"bytes_read" : 32768/"bytes_read" : $body.follow_stats.indices.0.shards.0.bytes_read/] -// TESTRESPONSE[s/"total_write_time_millis" : 16384/"total_write_time_millis" : $body.follow_stats.indices.0.shards.0.total_write_time_millis/] -// TESTRESPONSE[s/"write_buffer_size_in_bytes" : 1536/"write_buffer_size_in_bytes" : $body.follow_stats.indices.0.shards.0.write_buffer_size_in_bytes/] -// TESTRESPONSE[s/"successful_write_requests" : 16/"successful_write_requests" : $body.follow_stats.indices.0.shards.0.successful_write_requests/] -// TESTRESPONSE[s/"failed_write_requests" : 0/"failed_write_requests" : $body.follow_stats.indices.0.shards.0.failed_write_requests/] -// TESTRESPONSE[s/"operations_written" : 832/"operations_written" : $body.follow_stats.indices.0.shards.0.operations_written/] -// TESTRESPONSE[s/"time_since_last_read_millis" : 8/"time_since_last_read_millis" : $body.follow_stats.indices.0.shards.0.time_since_last_read_millis/] diff --git a/docs/reference/ccr/auto-follow.asciidoc b/docs/reference/ccr/auto-follow.asciidoc deleted file mode 100644 index 0b8ade90aae2a..0000000000000 --- a/docs/reference/ccr/auto-follow.asciidoc +++ /dev/null @@ -1,82 +0,0 @@ -[role="xpack"] -[[ccr-auto-follow]] -=== Manage auto-follow patterns -To replicate time series indices, you configure an auto-follow pattern so that -each new index in the series is replicated automatically. Whenever the name of -a new index on the remote cluster matches the auto-follow pattern, a -corresponding follower index is added to the local cluster. - -NOTE: Auto-follow patterns only match open indices on the remote cluster that -have all primary shards started. Auto-follow patterns do not match indices that -can't be used for {ccr-init} such as <> or -<>. Avoid using an auto-follow pattern -that matches indices with a <>. These -blocks prevent follower indices from replicating such indices. - -You can also create auto-follow patterns for data streams. When a new backing -index is generated on a remote cluster, that index and its data stream are -automatically followed if the data stream name matches an auto-follow -pattern. If you create a data stream after creating the auto-follow pattern, -all backing indices are followed automatically. - -The data streams replicated from a remote cluster by CCR are protected from -local rollovers. The <> -can be used to turn these data streams into regular data streams. - -Auto-follow patterns are especially useful with -<>, which might continually create -new indices on the cluster containing the leader index. - -[[ccr-access-ccr-auto-follow]] -To start using {ccr} auto-follow patterns from Stack Management in {kib}, select -*Cross-Cluster Replication* from the side navigation and choose the -*Auto-follow patterns* tab. - -[[ccr-auto-follow-create]] -==== Create auto-follow patterns -When you <>, -you are configuring a collection of patterns against a single remote cluster. -When an index is created in the remote cluster with a name that matches one of -the patterns in the collection, a follower index is configured in the local -cluster. The follower index uses the new index as its leader index. - -Use the <> to add a -new auto-follow pattern configuration. - -[[ccr-auto-follow-retrieve]] -==== Retrieve auto-follow patterns -To view existing auto-follow patterns and make changes to the backing -patterns, <> on your _remote_ cluster. - -Select the auto-follow pattern that you want to view details about. From there, -you can make changes to the auto-follow pattern. You can also view your -follower indices included in the auto-follow pattern. - -Use the <> to inspect -all configured auto-follow pattern collections. - -[[ccr-auto-follow-pause]] -==== Pause and resume auto-follow patterns -To pause and resume replication of auto-follow pattern collections, -<>, select the auto-follow pattern, -and pause replication. - -To resume replication, select the pattern and choose -*Manage pattern > Resume replication*. - -Use the <> to -pause auto-follow patterns. -Use the <> to -resume auto-follow patterns. - -[[ccr-auto-follow-delete]] -==== Delete auto-follow patterns -To delete an auto-follow pattern collection, -<>, select the auto-follow pattern, -and pause replication. - -When the pattern status changes to Paused, choose -*Manage pattern > Delete pattern*. - -Use the <> to -delete a configured auto-follow pattern collection. diff --git a/docs/reference/ccr/bi-directional-disaster-recovery.asciidoc b/docs/reference/ccr/bi-directional-disaster-recovery.asciidoc deleted file mode 100644 index b491e90053031..0000000000000 --- a/docs/reference/ccr/bi-directional-disaster-recovery.asciidoc +++ /dev/null @@ -1,275 +0,0 @@ -[role="xpack"] -[[ccr-disaster-recovery-bi-directional-tutorial]] -=== Tutorial: Disaster recovery based on bi-directional {ccr} -++++ -Bi-directional disaster recovery -++++ - -//// -[source,console] ----- -PUT _data_stream/logs-generic-default ----- -// TESTSETUP - -[source,console] ----- -DELETE /_data_stream/* ----- -// TEARDOWN -//// - -Learn how to set up disaster recovery between two clusters based on -bi-directional {ccr}. The following tutorial is designed for data streams which support -<> and <>. You can only perform these actions on the leader index. - -This tutorial works with {ls} as the source of ingestion. It takes advantage of a {ls} feature where {logstash-ref}/plugins-outputs-elasticsearch.html[the {ls} output to {es}] can be load balanced across an array of hosts specified. {beats} and {agents} currently do not -support multiple outputs. It should also be possible to set up a proxy -(load balancer) to redirect traffic without {ls} in this tutorial. - -* Setting up a remote cluster on `clusterA` and `clusterB`. -* Setting up bi-directional cross-cluster replication with exclusion patterns. -* Setting up {ls} with multiple hosts to allow automatic load balancing and switching during disasters. - -image::images/ccr-bi-directional-disaster-recovery.png[Bi-directional cross cluster replication failover and failback] - -[[ccr-tutorial-initial-setup]] -==== Initial setup -. Set up a remote cluster on both clusters. -+ -[source,console] ----- -### On cluster A ### -PUT _cluster/settings -{ - "persistent": { - "cluster": { - "remote": { - "clusterB": { - "mode": "proxy", - "skip_unavailable": true, - "server_name": "clusterb.es.region-b.gcp.elastic-cloud.com", - "proxy_socket_connections": 18, - "proxy_address": "clusterb.es.region-b.gcp.elastic-cloud.com:9400" - } - } - } - } -} -### On cluster B ### -PUT _cluster/settings -{ - "persistent": { - "cluster": { - "remote": { - "clusterA": { - "mode": "proxy", - "skip_unavailable": true, - "server_name": "clustera.es.region-a.gcp.elastic-cloud.com", - "proxy_socket_connections": 18, - "proxy_address": "clustera.es.region-a.gcp.elastic-cloud.com:9400" - } - } - } - } -} ----- -// TEST[setup:host] -// TEST[s/"server_name": "clustera.es.region-a.gcp.elastic-cloud.com",//] -// TEST[s/"server_name": "clusterb.es.region-b.gcp.elastic-cloud.com",//] -// TEST[s/"proxy_socket_connections": 18,//] -// TEST[s/clustera.es.region-a.gcp.elastic-cloud.com:9400/\${transport_host}/] -// TEST[s/clusterb.es.region-b.gcp.elastic-cloud.com:9400/\${transport_host}/] - -. Set up bi-directional cross-cluster replication. -+ -[source,console] ----- -### On cluster A ### -PUT /_ccr/auto_follow/logs-generic-default -{ - "remote_cluster": "clusterB", - "leader_index_patterns": [ - ".ds-logs-generic-default-20*" - ], - "leader_index_exclusion_patterns":"*-replicated_from_clustera", - "follow_index_pattern": "{{leader_index}}-replicated_from_clusterb" -} - -### On cluster B ### -PUT /_ccr/auto_follow/logs-generic-default -{ - "remote_cluster": "clusterA", - "leader_index_patterns": [ - ".ds-logs-generic-default-20*" - ], - "leader_index_exclusion_patterns":"*-replicated_from_clusterb", - "follow_index_pattern": "{{leader_index}}-replicated_from_clustera" -} ----- -// TEST[setup:remote_cluster] -// TEST[s/clusterA/remote_cluster/] -// TEST[s/clusterB/remote_cluster/] -+ -IMPORTANT: Existing data on the cluster will not be replicated by -`_ccr/auto_follow` even though the patterns may match. This function will only -replicate newly created backing indices (as part of the data stream). -+ -IMPORTANT: Use `leader_index_exclusion_patterns` to avoid recursion. -+ -TIP: `follow_index_pattern` allows lowercase characters only. -+ -TIP: This step cannot be executed via the {kib} UI due to the lack of an exclusion -pattern in the UI. Use the API in this step. - -. Set up the {ls} configuration file. -+ -This example uses the input generator to demonstrate the document -count in the clusters. Reconfigure this section -to suit your own use case. -+ -[source,logstash] ----- -### On Logstash server ### -### This is a logstash config file ### -input { - generator{ - message => 'Hello World' - count => 100 - } -} -output { - elasticsearch { - hosts => ["https://clustera.es.region-a.gcp.elastic-cloud.com:9243","https://clusterb.es.region-b.gcp.elastic-cloud.com:9243"] - user => "logstash-user" - password => "same_password_for_both_clusters" - } -} ----- -+ -IMPORTANT: The key point is that when `cluster A` is down, all traffic will be -automatically redirected to `cluster B`. Once `cluster A` comes back, traffic -is automatically redirected back to `cluster A` again. This is achieved by the -option `hosts` where multiple ES cluster endpoints are specified in the -array `[clusterA, clusterB]`. -+ -TIP: Set up the same password for the same user on both clusters to use this load-balancing feature. - -. Start {ls} with the earlier configuration file. -+ -[source,sh] ----- -### On Logstash server ### -bin/logstash -f multiple_hosts.conf ----- - -. Observe document counts in data streams. -+ -The setup creates a data stream named `logs-generic-default` on each of the clusters. {ls} will write 50% of the documents to `cluster A` and 50% of the documents to `cluster B` when both clusters are up. -+ -Bi-directional {ccr} will create one more data stream on each of the clusters -with the `-replication_from_cluster{a|b}` suffix. At the end of this step: -+ -* data streams on cluster A contain: -** 50 documents in `logs-generic-default-replicated_from_clusterb` -** 50 documents in `logs-generic-default` -* data streams on cluster B contain: -** 50 documents in `logs-generic-default-replicated_from_clustera` -** 50 documents in `logs-generic-default` - -. Queries should be set up to search across both data streams. -A query on `logs*`, on either of the clusters, returns 100 -hits in total. -+ -[source,console] ----- -GET logs*/_search?size=0 ----- - - -==== Failover when `clusterA` is down -. You can simulate this by shutting down either of the clusters. Let's shut down -`cluster A` in this tutorial. -. Start {ls} with the same configuration file. (This step is not required in real -use cases where {ls} ingests continuously.) -+ -[source,sh] ----- -### On Logstash server ### -bin/logstash -f multiple_hosts.conf ----- - -. Observe all {ls} traffic will be redirected to `cluster B` automatically. -+ -TIP: You should also redirect all search traffic to the `clusterB` cluster during this time. - -. The two data streams on `cluster B` now contain a different number of documents. -+ -* data streams on cluster A (down) -** 50 documents in `logs-generic-default-replicated_from_clusterb` -** 50 documents in `logs-generic-default` -* data streams On cluster B (up) -** 50 documents in `logs-generic-default-replicated_from_clustera` -** 150 documents in `logs-generic-default` - - -==== Failback when `clusterA` comes back -. You can simulate this by turning `cluster A` back on. -. Data ingested to `cluster B` during `cluster A` 's downtime will be -automatically replicated. -+ -* data streams on cluster A -** 150 documents in `logs-generic-default-replicated_from_clusterb` -** 50 documents in `logs-generic-default` -* data streams on cluster B -** 50 documents in `logs-generic-default-replicated_from_clustera` -** 150 documents in `logs-generic-default` - -. If you have {ls} running at this time, you will also observe traffic is -sent to both clusters. - -==== Perform update or delete by query -It is possible to update or delete the documents but you can only perform these actions on the leader index. - -. First identify which backing index contains the document you want to update. -+ -[source,console] ----- -### On either of the cluster ### -GET logs-generic-default*/_search?filter_path=hits.hits._index -{ -"query": { - "match": { - "event.sequence": "97" - } - } -} ----- -+ -* If the hits returns `"_index": ".ds-logs-generic-default-replicated_from_clustera--*"`, then you need to proceed to the next step on `cluster A`. -* If the hits returns `"_index": ".ds-logs-generic-default-replicated_from_clusterb--*"`, then you need to proceed to the next step on `cluster B`. -* If the hits returns `"_index": ".ds-logs-generic-default--*"`, then you need to proceed to the next step on the same cluster where you performed the search query. - -. Perform the update (or delete) by query: -+ -[source,console] ----- -### On the cluster identified from the previous step ### -POST logs-generic-default/_update_by_query -{ - "query": { - "match": { - "event.sequence": "97" - } - }, - "script": { - "source": "ctx._source.event.original = params.new_event", - "lang": "painless", - "params": { - "new_event": "FOOBAR" - } - } -} ----- -+ -TIP: If a soft delete is merged away before it can be replicated to a follower the following process will fail due to incomplete history on the leader, see <> for more details. diff --git a/docs/reference/ccr/getting-started.asciidoc b/docs/reference/ccr/getting-started.asciidoc deleted file mode 100644 index 2a0e3bcc5681f..0000000000000 --- a/docs/reference/ccr/getting-started.asciidoc +++ /dev/null @@ -1,326 +0,0 @@ -[role="xpack"] -[[ccr-getting-started-tutorial]] -=== Tutorial: Set up {ccr} -++++ -Set up {ccr} -++++ - -//// -[source,console] ----- -PUT /server-metrics -{ - "settings" : { - "index" : { - "number_of_shards" : 1, - "number_of_replicas" : 0 - } - }, - "mappings" : { - "properties" : { - "@timestamp" : { - "type" : "date" - }, - "accept" : { - "type" : "long" - }, - "deny" : { - "type" : "long" - }, - "host" : { - "type" : "keyword" - }, - "response" : { - "type" : "float" - }, - "service" : { - "type" : "keyword" - }, - "total" : { - "type" : "long" - } - } - } -} ----- -// TESTSETUP -//// - -Use this guide to set up {ccr} (CCR) between clusters in two -datacenters. Replicating your data across datacenters provides several benefits: - -* Brings data closer to your users or application server to reduce latency and -response time -* Provides your mission-critical applications with the tolerance to withstand datacenter or region outages - -In this guide, you'll learn how to: - -* Configure a <> with a leader index -* Create a follower index on a local cluster -* Create an auto-follow pattern to automatically follow time series indices -that are periodically created in a remote cluster - -You can manually create follower indices to replicate specific indices on a -remote cluster, or configure auto-follow patterns to replicate rolling time series indices. - -TIP: If you want to replicate data across clusters in the cloud, you can -link:{cloud}/ec-enable-ccs.html[configure remote clusters on {ess}]. Then, you -can <> and set up {ccr}. - -[[ccr-getting-started-prerequisites]] -==== Prerequisites -To complete this tutorial, you need: - -* The `manage` cluster privilege on the local cluster. -* A license on both clusters that includes {ccr}. {kibana-ref}/managing-licenses.html[Activate a free 30-day trial]. -* An index on the remote cluster that contains the data you want to replicate. -This tutorial uses the sample eCommerce orders data set. -{kibana-ref}/get-started.html#gs-get-data-into-kibana[Load sample data]. -* In the local cluster, all nodes with the `master` <> must -also have the <> role. The local cluster -must also have at least one node with both a data role and the -<> role. Individual tasks for coordinating -replication scale based on the number of data nodes with the -`remote_cluster_client` role in the local cluster. - -==== Connect to a remote cluster -To replicate an index on a remote cluster (Cluster A) to a local cluster (Cluster B), you configure Cluster A as a remote on Cluster B. - -image::images/ccr-tutorial-clusters.png[ClusterA contains the leader index and ClusterB contains the follower index] - -To configure a remote cluster from Stack Management in {kib}: - -. Set up a <> as needed. -. Select *Remote Clusters* from the side navigation. -. Specify the {es} endpoint URL, or the IP address or host name of the remote -cluster (`ClusterA`) followed by the transport port (defaults to `9300`). For -example, `cluster.es.eastus2.staging.azure.foundit.no:9400` or -`192.168.1.1:9300`. - -[%collapsible%open] -.API example -==== -You can also use the <> to -add a remote cluster: - -[source,console] ----- -PUT /_cluster/settings -{ - "persistent" : { - "cluster" : { - "remote" : { - "leader" : { - "seeds" : [ - "127.0.0.1:9300" <1> - ] - } - } - } - } -} ----- -// TEST[setup:host] -// TEST[s/127.0.0.1:9300/\${transport_host}/] -<1> Specifies the hostname and transport port of a seed node in the remote - cluster. - -You can verify that the local cluster is successfully connected to the remote -cluster. - -[source,console] ----- -GET /_remote/info ----- -// TEST[continued] - -The API response indicates that the local cluster is connected to the remote -cluster with cluster alias `leader`. - -[source,console-result] ----- -{ - "leader" : { - "seeds" : [ - "127.0.0.1:9300" - ], - "connected" : true, - "num_nodes_connected" : 1, <1> - "max_connections_per_cluster" : 3, - "initial_connect_timeout" : "30s", - "skip_unavailable" : true, - "mode" : "sniff" - } -} ----- -// TESTRESPONSE[s/127.0.0.1:9300/$body.leader.seeds.0/] -// TEST[s/"connected" : true/"connected" : $body.leader.connected/] -// TEST[s/"num_nodes_connected" : 1/"num_nodes_connected" : $body.leader.num_nodes_connected/] -<1> The number of nodes in the remote cluster the local cluster is - connected to. -==== - -include::{es-ref-dir}/security/authentication/remote-clusters-privileges-cert.asciidoc[tag=configure-ccr-privileges] - -[[ccr-getting-started-follower-index]] -==== Create a follower index to replicate a specific index -When you create a follower index, you reference the remote cluster and the -leader index in your remote cluster. - -To create a follower index from Stack Management in {kib}: - -. Select *Cross-Cluster Replication* in the side navigation and choose the -*Follower Indices* tab. -. Choose the cluster (ClusterA) containing the leader index you want to -replicate. -. Enter the name of the leader index, which is -`kibana_sample_data_ecommerce` if you are following the tutorial. -. Enter a name for your follower index, such as `follower-kibana-sample-data`. - -{es} initializes the follower using the -<> -process, which transfers the existing Lucene segment files from the leader -index to the follower index. The index status changes to *Paused*. When the -remote recovery process is complete, the index following begins and the status -changes to *Active*. - -When you index documents into your leader index, {es} replicates the documents -in the follower index. - -[role="screenshot"] -image::images/ccr-follower-index.png["The Cross-Cluster Replication page in {kib}"] - -[%collapsible%open] -.API example -==== -You can also use the <> to create follower -indices. When you create a follower index, you must reference the remote cluster -and the leader index that you created in the remote cluster. - -When initiating the follower request, the response returns before the -<> process completes. To wait for the process -to complete, add the `wait_for_active_shards` parameter to your request. - -[source,console] ----- -PUT /server-metrics-follower/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster" : "leader", - "leader_index" : "server-metrics" -} ----- -// TEST[continued] - -////////////////////////// - -[source,console-result] --------------------------------------------------- -{ - "follow_index_created" : true, - "follow_index_shards_acked" : true, - "index_following_started" : true -} --------------------------------------------------- - -////////////////////////// - -Use the -<> to inspect the status of -replication. - -////////////////////////// - -[source,console] --------------------------------------------------- -POST /server-metrics-follower/_ccr/pause_follow - -POST /server-metrics-follower/_close - -POST /server-metrics-follower/_ccr/unfollow --------------------------------------------------- -// TEST[continued] - -////////////////////////// -==== - -[[ccr-getting-started-auto-follow]] -==== Create an auto-follow pattern to replicate time series indices -You use <> to automatically create new -followers for rolling time series indices. Whenever the name of a new index on -the remote cluster matches the auto-follow pattern, a corresponding follower -index is added to the local cluster. Note that only indices created on the -remote cluster after the auto-follow pattern is created will be auto-followed: -existing indices on the remote cluster are ignored even if they match the pattern. - -An auto-follow pattern specifies the remote cluster you want to replicate from, -and one or more index patterns that specify the rolling time series indices you -want to replicate. - -// tag::ccr-create-auto-follow-pattern-tag[] -To create an auto-follow pattern from Stack Management in {kib}: - -. Select *Cross Cluster Replication* in the side navigation and choose the -*Auto-follow patterns* tab. -. Enter a name for the auto-follow pattern, such as `beats`. -. Choose the remote cluster that contains the index you want to replicate, -which in the example scenario is Cluster A. -. Enter one or more index patterns that identify the indices you want to -replicate from the remote cluster. For example, enter -`metricbeat-* packetbeat-*` to automatically create followers for {metricbeat} and {packetbeat} indices. -. Enter *follower-* as the prefix to apply to the names of the follower indices so -you can more easily identify replicated indices. - -As new indices matching these patterns are -created on the remote, {es} automatically replicates them to local follower indices. - -[role="screenshot"] -image::images/auto-follow-patterns.png["The Auto-follow patterns page in {kib}"] - -// end::ccr-create-auto-follow-pattern-tag[] - -[%collapsible%open] -.API example -==== -Use the <> to -configure auto-follow patterns. - -[source,console] ----- -PUT /_ccr/auto_follow/beats -{ - "remote_cluster" : "leader", - "leader_index_patterns" : - [ - "metricbeat-*", <1> - "packetbeat-*" <2> - ], - "follow_index_pattern" : "{{leader_index}}-copy" <3> -} ----- -// TEST[continued] -<1> Automatically follow new {metricbeat} indices. -<2> Automatically follow new {packetbeat} indices. -<3> The name of the follower index is derived from the name of the leader index - by adding the suffix `-copy` to the name of the leader index. - -////////////////////////// - -[source,console-result] --------------------------------------------------- -{ - "acknowledged" : true -} --------------------------------------------------- - -////////////////////////// - -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_ccr/auto_follow/beats --------------------------------------------------- -// TEST[continued] - -////////////////////////// -==== diff --git a/docs/reference/ccr/images/auto-follow-patterns.png b/docs/reference/ccr/images/auto-follow-patterns.png deleted file mode 100644 index 69e1cc8641bfd..0000000000000 Binary files a/docs/reference/ccr/images/auto-follow-patterns.png and /dev/null differ diff --git a/docs/reference/ccr/images/ccr-arch-bi-directional.png b/docs/reference/ccr/images/ccr-arch-bi-directional.png deleted file mode 100644 index 9c936cd9f4b42..0000000000000 Binary files a/docs/reference/ccr/images/ccr-arch-bi-directional.png and /dev/null differ diff --git a/docs/reference/ccr/images/ccr-arch-central-reporting.png b/docs/reference/ccr/images/ccr-arch-central-reporting.png deleted file mode 100644 index c89346e290065..0000000000000 Binary files a/docs/reference/ccr/images/ccr-arch-central-reporting.png and /dev/null differ diff --git a/docs/reference/ccr/images/ccr-arch-chain-dcs.png b/docs/reference/ccr/images/ccr-arch-chain-dcs.png deleted file mode 100644 index 042a8185c7589..0000000000000 Binary files a/docs/reference/ccr/images/ccr-arch-chain-dcs.png and /dev/null differ diff --git a/docs/reference/ccr/images/ccr-arch-data-locality.png b/docs/reference/ccr/images/ccr-arch-data-locality.png deleted file mode 100644 index a2b67b07284f4..0000000000000 Binary files a/docs/reference/ccr/images/ccr-arch-data-locality.png and /dev/null differ diff --git a/docs/reference/ccr/images/ccr-arch-disaster-recovery.png b/docs/reference/ccr/images/ccr-arch-disaster-recovery.png deleted file mode 100644 index 244c71910ff55..0000000000000 Binary files a/docs/reference/ccr/images/ccr-arch-disaster-recovery.png and /dev/null differ diff --git a/docs/reference/ccr/images/ccr-arch-multiple-dcs.png b/docs/reference/ccr/images/ccr-arch-multiple-dcs.png deleted file mode 100644 index 2a2fd05a00711..0000000000000 Binary files a/docs/reference/ccr/images/ccr-arch-multiple-dcs.png and /dev/null differ diff --git a/docs/reference/ccr/images/ccr-bi-directional-disaster-recovery.png b/docs/reference/ccr/images/ccr-bi-directional-disaster-recovery.png deleted file mode 100644 index ad597160d3ce0..0000000000000 Binary files a/docs/reference/ccr/images/ccr-bi-directional-disaster-recovery.png and /dev/null differ diff --git a/docs/reference/ccr/images/ccr-follower-index.png b/docs/reference/ccr/images/ccr-follower-index.png deleted file mode 100644 index dee64c5272cc7..0000000000000 Binary files a/docs/reference/ccr/images/ccr-follower-index.png and /dev/null differ diff --git a/docs/reference/ccr/images/ccr-tutorial-clusters.png b/docs/reference/ccr/images/ccr-tutorial-clusters.png deleted file mode 100644 index 23e72702fdfab..0000000000000 Binary files a/docs/reference/ccr/images/ccr-tutorial-clusters.png and /dev/null differ diff --git a/docs/reference/ccr/images/ccr-uni-directional-disaster-recovery.png b/docs/reference/ccr/images/ccr-uni-directional-disaster-recovery.png deleted file mode 100644 index ad6e19fa13812..0000000000000 Binary files a/docs/reference/ccr/images/ccr-uni-directional-disaster-recovery.png and /dev/null differ diff --git a/docs/reference/ccr/images/remote-clusters.png b/docs/reference/ccr/images/remote-clusters.png deleted file mode 100644 index 78b819726da41..0000000000000 Binary files a/docs/reference/ccr/images/remote-clusters.png and /dev/null differ diff --git a/docs/reference/ccr/index.asciidoc b/docs/reference/ccr/index.asciidoc deleted file mode 100644 index f3180da1ae77e..0000000000000 --- a/docs/reference/ccr/index.asciidoc +++ /dev/null @@ -1,347 +0,0 @@ -[role="xpack"] -[[xpack-ccr]] -== {ccr-cap} -With {ccr}, you can replicate indices across clusters to: - -* Continue handling search requests in the event of a datacenter outage -* Prevent search volume from impacting indexing throughput -* Reduce search latency by processing search requests in geo-proximity to the -user - -{ccr-cap} uses an active-passive model. You index to a _leader_ index, and the -data is replicated to one or more read-only _follower_ indices. Before you can add a follower index to a cluster, you must configure the _remote cluster_ that contains the leader index. - -When the leader index receives writes, the follower indices pull changes from -the leader index on the remote cluster. You can manually create follower -indices, or configure auto-follow patterns to automatically create follower -indices for new time series indices. - -You configure {ccr} clusters in a uni-directional or bi-directional setup: - -* In a uni-directional configuration, one cluster contains only -leader indices, and the other cluster contains only follower indices. -* In a bi-directional configuration, each cluster contains both leader and -follower indices. - -In a uni-directional configuration, the cluster containing follower indices -must be running **the same or newer** version of {es} as the remote cluster. -If newer, the versions must also be compatible as outlined in the following matrix. - -[%collapsible] -[[ccr-version-compatibility]] -.Version compatibility matrix -==== -include::../modules/remote-clusters-shared.asciidoc[tag=remote-cluster-compatibility-matrix] -==== - -[discrete] -[[ccr-multi-cluster-architectures]] -=== Multi-cluster architectures -Use {ccr} to construct several multi-cluster architectures within the Elastic -Stack: - -* <> in case a primary cluster fails, -with a secondary cluster serving as a hot backup -* <> to maintain multiple copies of the -dataset close to the application servers (and users), and reduce costly latency -* <> for minimizing network -traffic and latency in querying multiple geo-distributed {es} clusters, or for -preventing search load from interfering with indexing by offloading search to a -secondary cluster - -Watch the -https://www.elastic.co/webinars/replicate-elasticsearch-data-with-cross-cluster-replication-ccr[{ccr} webinar] to learn more about the following use cases. -Then, <> on your local machine and work -through the demo from the webinar. - -IMPORTANT: In all of these use cases, you must -<> independently on every -cluster. The security configuration is not replicated when configuring {ccr} for -disaster recovery. To ensure that the {es} `security` feature state is backed up, -<> regularly. You can then restore -the native users, roles, and tokens from your security configuration. -[discrete] -[[ccr-disaster-recovery]] -==== Disaster recovery and high availability -Disaster recovery provides your mission-critical applications with the -tolerance to withstand datacenter or region outages. This use case is the -most common deployment of {ccr}. You can configure clusters in different -architectures to support disaster recovery and high availability: - -* <> -* <> -* <> -* <> - -[discrete] -[[ccr-single-datacenter-recovery]] -===== Single disaster recovery datacenter -In this configuration, data is replicated from the production datacenter to the -disaster recovery datacenter. Because the follower indices replicate the leader -index, your application can use the disaster recovery datacenter if the -production datacenter is unavailable. - -image::images/ccr-arch-disaster-recovery.png[Production datacenter that replicates data to a disaster recovery datacenter] - -[discrete] -[[ccr-multiple-datacenter-recovery]] -===== Multiple disaster recovery datacenters -You can replicate data from one datacenter to multiple datacenters. This -configuration provides both disaster recovery and high availability, ensuring -that data is replicated in two datacenters if the primary datacenter is down -or unavailable. - -In the following diagram, data from Datacenter A is replicated to -Datacenter B and Datacenter C, which both have a read-only copy of the leader -index from Datacenter A. - -image::images/ccr-arch-multiple-dcs.png[Production datacenter that replicates data to two other datacenters] - -[discrete] -[[ccr-chained-replication]] -===== Chained replication -You can replicate data across multiple datacenters to form a replication -chain. In the following diagram, Datacenter A contains the leader index. -Datacenter B replicates data from Datacenter A, and Datacenter C replicates -from the follower indices in Datacenter B. The connection between these -datacenters forms a chained replication pattern. - -image::images/ccr-arch-chain-dcs.png[Three datacenters connected to form a replication chain] - -[discrete] -[[ccr-bi-directional-replication]] -===== Bi-directional replication -In a https://www.elastic.co/blog/bi-directional-replication-with-elasticsearch-cross-cluster-replication-ccr[bi-directional replication] setup, all clusters have access to view -all data, and all clusters have an index to write to without manually -implementing failover. Applications can write to the local index within each -datacenter, and read across multiple indices for a global view of all -information. - -This configuration requires no manual intervention when a cluster or datacenter -is unavailable. In the following diagram, if Datacenter A is unavailable, you can continue using Datacenter B without manual failover. When Datacenter A -comes online, replication resumes between the clusters. - -image::images/ccr-arch-bi-directional.png[Bi-directional configuration where each cluster contains both a leader index and follower indices] - -This configuration is particularly useful for index-only workloads, where no updates -to document values occur. In this configuration, documents indexed by {es} are -immutable. Clients are located in each datacenter alongside the {es} -cluster, and do not communicate with clusters in different datacenters. - -[discrete] -[[ccr-data-locality]] -==== Data locality -Bringing data closer to your users or application server can reduce latency -and response time. This methodology also applies when replicating data in {es}. -For example, you can replicate a product catalog or reference dataset to 20 or -more datacenters around the world to minimize the distance between the data and -the application server. - -In the following diagram, data is replicated from one datacenter to three -additional datacenters, each in their own region. The central datacenter -contains the leader index, and the additional datacenters contain follower -indices that replicate data in that particular region. This configuration -puts data closer to the application accessing it. - -image::images/ccr-arch-data-locality.png[A centralized datacenter replicated across three other datacenters, each in their own region] - -[discrete] -[[ccr-centralized-reporting]] -==== Centralized reporting -Using a centralized reporting cluster is useful when querying across a large -network is inefficient. In this configuration, you replicate data from many -smaller clusters to the centralized reporting cluster. - -For example, a large global bank might have 100 {es} clusters around the world -that are distributed across different regions for each bank branch. Using -{ccr}, the bank can replicate events from all 100 banks to a central cluster to -analyze and aggregate events locally for reporting. Rather than maintaining a -mirrored cluster, the bank can use {ccr} to replicate specific indices. - -In the following diagram, data from three datacenters in different regions is -replicated to a centralized reporting cluster. This configuration enables you -to copy data from regional hubs to a central cluster, where you can run all -reports locally. - -image::images/ccr-arch-central-reporting.png[Three clusters in different regions sending data to a centralized reporting cluster for analysis] - -[discrete] -[[ccr-replication-mechanics]] -=== Replication mechanics -Although you <> at the index level, {es} -achieves replication at the shard level. When a follower index is created, -each shard in that index pulls changes from its corresponding shard in the -leader index, which means that a follower index has the same number of -shards as its leader index. All operations on the leader are replicated by the -follower, such as operations to create, update, or delete a document. -These requests can be served from any copy of the leader shard (primary or -replica). - -When a follower shard sends a read request, the leader shard responds with -any new operations, limited by the read parameters that you establish when -configuring the follower index. If no new operations are available, the -leader shard waits up to the configured timeout for new operations. If the -timeout elapses, the leader shard responds to the follower shard that there -are no new operations. The follower shard updates shard statistics and -immediately sends another read request to the leader shard. This -communication model ensures that network connections between the remote -cluster and the local cluster are continually in use, avoiding forceful -termination by an external source such as a firewall. - -If a read request fails, the cause of the failure is inspected. If the -cause of the failure is deemed to be recoverable (such as a network -failure), the follower shard enters into a retry loop. Otherwise, the -follower shard pauses -<>. - -[discrete] -[[ccr-update-leader-index]] -==== Processing updates -You can't manually modify a follower index's mappings or aliases. To make -changes, you must update the leader index. Because they are read-only, follower -indices reject writes in all configurations. - -NOTE: Although changes to aliases on the leader index are replicated to follower -indices, write indices are ignored. Follower indices can't accept direct writes, -so if any leader aliases have `is_write_index` set to `true`, that value is -forced to `false`. - -For example, you index a document named `doc_1` in Datacenter A, which -replicates to Datacenter B. If a client connects to Datacenter B and attempts -to update `doc_1`, the request fails. To update `doc_1`, the client must -connect to Datacenter A and update the document in the leader index. - -When a follower shard receives operations from the leader shard, it places -those operations in a write buffer. The follower shard submits bulk write -requests using operations from the write buffer. If the write buffer exceeds -its configured limits, no additional read requests are sent. This configuration -provides a back-pressure against read requests, allowing the follower shard -to resume sending read requests when the write buffer is no longer full. - -To manage how operations are replicated from the leader index, you can -configure settings when -<>. - -Changes in the index mapping on the leader index are replicated to the -follower index as soon as possible. This behavior is true for index -settings as well, except for some settings that are local to the leader -index. For example, changing the number of replicas on the leader index is -not replicated by the follower index, so that setting might not be retrieved. - -If you apply a non-dynamic settings change to the leader index that is -needed by the follower index, the follower index closes itself, applies the -settings update, and then re-opens itself. The follower index is unavailable -for reads and cannot replicate writes during this cycle. - -[discrete] -[[ccr-remote-recovery]] -=== Initializing followers using remote recovery -When you create a follower index, you cannot use it until it is fully -initialized. The _remote recovery_ process builds a new copy of a shard on a -follower node by copying data from the primary shard in the leader cluster. - -{es} uses this remote recovery process to bootstrap a follower index using the -data from the leader index. This process provides the follower with a copy of -the current state of the leader index, even if a complete history of changes -is not available on the leader due to Lucene segment merging. - -Remote recovery is a network intensive process that transfers all of the Lucene -segment files from the leader cluster to the follower cluster. The follower -requests that a recovery session be initiated on the primary shard in the -leader cluster. The follower then requests file chunks concurrently from the -leader. By default, the process concurrently requests five 1MB file -chunks. This default behavior is designed to support leader and follower -clusters with high network latency between them. - -TIP: You can modify dynamic <> -to rate-limit the transmitted data and manage the resources consumed by remote -recoveries. - -Use the <> on the cluster containing the follower -index to obtain information about an in-progress remote recovery. Because {es} -implements remote recoveries using the -<> infrastructure, running remote -recoveries are labelled as type `snapshot` in the recovery API. - -[discrete] -[[ccr-leader-requirements]] -=== Replicating a leader requires soft deletes -{ccr-cap} works by replaying the history of individual write -operations that were performed on the shards of the leader index. {es} needs to -retain the -<> on the leader -shards so that they can be pulled by the follower shard tasks. The underlying -mechanism used to retain these operations is _soft deletes_. - -A soft delete occurs whenever an existing document is deleted or updated. By -retaining these soft deletes up to configurable limits, the history of -operations can be retained on the leader shards and made available to the -follower shard tasks as it replays the history of operations. - -The <> -setting defines the maximum time to retain a shard history retention lease -before it is considered expired. This setting determines how long the cluster -containing your follower index can be offline, which is 12 hours by default. If -a shard copy recovers after its retention lease expires, but the missing -operations are still available on the leader index, then {es} will establish a -new lease and copy the missing operations. However {es} does not guarantee to -retain unleased operations, so it is also possible that some of the missing -operations have been discarded by the leader and are now completely -unavailable. If this happens then the follower cannot recover automatically so -you must <>. - -Soft deletes must be enabled for indices that you want to use as leader -indices. Soft deletes are enabled by default on new indices created on -or after {es} 7.0.0. - -// tag::ccr-existing-indices-tag[] -IMPORTANT: {ccr-cap} cannot be used on existing indices created using {es} -7.0.0 or earlier, where soft deletes are disabled. You must -<> your data into a new index with soft deletes -enabled. - -// end::ccr-existing-indices-tag[] - -[discrete] -[[ccr-learn-more]] -=== Use {ccr} -This following sections provide more information about how to configure -and use {ccr}: - -* <> -* <> -* <> -* <> - -[discrete] -[[ccr-limitations]] -=== {ccr-cap} limitations -{ccr-cap} is designed to replicate user-generated indices only, and doesn't -currently replicate any of the following: - -* <> -* {ml-docs}/machine-learning-intro.html[Machine learning jobs] -* <> -* <> and -<> polices -* {ref}/mapping-roles.html[User permissions and role mappings] -* <> -* <> -* <> - -If you want to replicate any of this data, you must replicate it to a remote -cluster manually. - -NOTE: Data for <> indices is stored in -the snapshot repository. {ccr-cap} won't replicate these indices completely, even -though they're either partially or fully-cached on the {es} nodes. To achieve -searchable snapshots in a remote cluster, configure snapshot repositories on -the remote cluster and use the same {ilm} policy from the local cluster to move -data into the cold or frozen tiers on the remote cluster. - -include::getting-started.asciidoc[] -include::managing.asciidoc[] -include::auto-follow.asciidoc[] -include::upgrading.asciidoc[] -include::uni-directional-disaster-recovery.asciidoc[] -include::bi-directional-disaster-recovery.asciidoc[] diff --git a/docs/reference/ccr/managing.asciidoc b/docs/reference/ccr/managing.asciidoc deleted file mode 100644 index d7db04cf60f07..0000000000000 --- a/docs/reference/ccr/managing.asciidoc +++ /dev/null @@ -1,140 +0,0 @@ -[role="xpack"] - -[[ccr-managing]] -=== Manage {ccr} -Use the following information to manage {ccr} tasks, such as inspecting -replication progress, pausing and resuming replication, recreating a follower -index, and terminating replication. - -[[ccr-access-ccr]] -To start using {ccr}, access {kib} and go to -*Management > Stack Management*. In the side navigation, select -*Cross-Cluster Replication*. - -[[ccr-inspect-progress]] -==== Inspect replication statistics -To inspect the progress of replication for a follower index and view -detailed shard statistics, <> and choose the *Follower indices* tab. - -Select the name of the follower index you want to view replication details -for. The slide-out panel shows settings and replication statistics for the -follower index, including read and write operations that are managed by the -follower shard. - -To view more detailed statistics, click *View in Index Management*, and -then select the name of the follower index in Index Management. -Open the tabs for detailed statistics about the follower index. - -[%collapsible] -.API example -==== -Use the <> to inspect replication -progress at the shard level. This API provides insight into the read and writes -managed by the follower shard. The API also reports read exceptions that can be -retried and fatal exceptions that require user intervention. -==== - -[[ccr-pause-replication]] -==== Pause and resume replication -To pause and resume replication of the leader index, <> and choose the *Follower indices* tab. - -Select the follower index you want to pause and choose *Manage > Pause Replication*. The follower index status changes to Paused. - -To resume replication, select the follower index and choose -*Resume replication*. - -[%collapsible] -.API example -==== -You can pause replication with the -<> and then later resume -replication with the <>. -Using these APIs in tandem enables you to adjust the read and write parameters -on the follower shard task if your initial configuration is not suitable for -your use case. -==== - -[[ccr-recreate-follower-index]] -==== Recreate a follower index -When a document is updated or deleted, the underlying operation is retained in -the Lucene index for a period of time defined by the -<> parameter. You configure -this setting on the <>. - -When a follower index starts, it acquires a retention lease from -the leader index. This lease informs the leader that it should not allow a soft -delete to be pruned until either the follower indicates that it has received -the operation, or until the lease expires. - -If a follower index falls sufficiently behind a leader and cannot -replicate operations, {es} reports an `indices[].fatal_exception` error. To -resolve the issue, recreate the follower index. When the new follow index -starts, the <> process recopies the -Lucene segment files from the leader. - -IMPORTANT: Recreating the follower index is a destructive action. All existing -Lucene segment files are deleted on the cluster containing the follower index. - -To recreate a follower index, -<> and choose the -*Follower indices* tab. - -Select the follower index and pause replication. When the follower index status -changes to Paused, reselect the follower index and choose to unfollow the -leader index. - -The follower index will be converted to a standard index and will no longer -display on the Cross-Cluster Replication page. - -In the side navigation, choose *Index Management*. Select the follower index -from the previous steps and close the follower index. - -You can then <> -to restart the replication process. - -[%collapsible] -.Use the API -==== -Use the <> to pause the replication -process. Then, close the follower index and recreate it. For example: - -[source,console] ----------------------------------------------------------------------- -POST /follower_index/_ccr/pause_follow - -POST /follower_index/_close - -PUT /follower_index/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster" : "remote_cluster", - "leader_index" : "leader_index" -} ----------------------------------------------------------------------- -// TEST[setup:remote_cluster_and_leader_index_and_follower_index teardown:pause_follow] -==== - -[[ccr-terminate-replication]] -==== Terminate replication -You can unfollow a leader index to terminate replication and convert the -follower index to a standard index. - -<> and choose the -*Follower indices* tab. - -Select the follower index and pause replication. When the follower index status -changes to Paused, reselect the follower index and choose to unfollow the -leader index. - -The follower index will be converted to a standard index and will no longer -display on the Cross-Cluster Replication page. - -You can then choose *Index Management*, select the follower index -from the previous steps, and close the follower index. - -[%collapsible] -.Use the API -==== -You can terminate replication with the -<>. This API converts a follower index -to a standard (non-follower) index. -==== diff --git a/docs/reference/ccr/uni-directional-disaster-recovery.asciidoc b/docs/reference/ccr/uni-directional-disaster-recovery.asciidoc deleted file mode 100644 index 731fbc0b242c9..0000000000000 --- a/docs/reference/ccr/uni-directional-disaster-recovery.asciidoc +++ /dev/null @@ -1,194 +0,0 @@ -[role="xpack"] -[[ccr-disaster-recovery-uni-directional-tutorial]] -=== Tutorial: Disaster recovery based on uni-directional {ccr} -++++ -Uni-directional disaster recovery -++++ - -//// -[source,console] ----- -PUT kibana_sample_data_ecommerce ----- -// TESTSETUP - -[source,console] ----- -DELETE kibana_sample_data_ecommerce ----- -// TEARDOWN -//// - - -Learn how to failover and failback between two clusters based on uni-directional {ccr}. You can also visit <> to set up replicating data streams that automatically failover and failback without human intervention. - -* Setting up uni-directional {ccr} replicated from `clusterA` -to `clusterB`. -* Failover - If `clusterA` goes offline, `clusterB` needs to "promote" follower -indices to regular indices to allow write operations. All ingestion will need to -be redirected to `clusterB`, this is controlled by the clients ({ls}, {beats}, -{agents}, etc). -* Failback - When `clusterA` is back online, it assumes the role of a follower -and replicates the leader indices from `clusterB`. - -image::images/ccr-uni-directional-disaster-recovery.png[Uni-directional cross cluster replication failover and failback] - -NOTE: {ccr-cap} provides functionality to replicate user-generated indices only. -{ccr-cap} isn't designed for replicating system-generated indices or snapshot -settings, and can't replicate {ilm-init} or {slm-init} policies across clusters. -Learn more in {ccr} <>. - -==== Prerequisites -Before completing this tutorial, -<> to connect two -clusters and configure a follower index. - -In this tutorial, `kibana_sample_data_ecommerce` is replicated from `clusterA` to `clusterB`. - -[source,console] ----- -### On clusterB ### -PUT _cluster/settings -{ - "persistent": { - "cluster": { - "remote": { - "clusterA": { - "mode": "proxy", - "skip_unavailable": "true", - "server_name": "clustera.es.region-a.gcp.elastic-cloud.com", - "proxy_socket_connections": "18", - "proxy_address": "clustera.es.region-a.gcp.elastic-cloud.com:9400" - } - } - } - } -} ----- -// TEST[setup:host] -// TEST[s/"server_name": "clustera.es.region-a.gcp.elastic-cloud.com",//] -// TEST[s/"proxy_socket_connections": 18,//] -// TEST[s/clustera.es.region-a.gcp.elastic-cloud.com:9400/\${transport_host}/] -// TEST[s/clusterA/remote_cluster/] - -[source,console] ----- -### On clusterB ### -PUT /kibana_sample_data_ecommerce2/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster": "clusterA", - "leader_index": "kibana_sample_data_ecommerce" -} ----- -// TEST[continued] -// TEST[s/clusterA/remote_cluster/] - -IMPORTANT: Writes (such as ingestion or updates) should occur only on the leader -index. Follower indices are read-only and will reject any writes. - - -==== Failover when `clusterA` is down - -. Promote the follower indices in `clusterB` into regular indices so -that they accept writes. This can be achieved by: -* First, pause indexing following for the follower index. -* Next, close the follower index. -* Unfollow the leader index. -* Finally, open the follower index (which at this point is a regular index). - -+ -[source,console] ----- -### On clusterB ### -POST /kibana_sample_data_ecommerce2/_ccr/pause_follow -POST /kibana_sample_data_ecommerce2/_close -POST /kibana_sample_data_ecommerce2/_ccr/unfollow -POST /kibana_sample_data_ecommerce2/_open ----- -// TEST[continued] - -. On the client side ({ls}, {beats}, {agent}), manually re-enable ingestion of -`kibana_sample_data_ecommerce2` and redirect traffic to the `clusterB`. You should -also redirect all search traffic to the `clusterB` cluster during -this time. You can simulate this by ingesting documents into this index. You should -notice this index is now writable. -+ -[source,console] ----- -### On clusterB ### -POST kibana_sample_data_ecommerce2/_doc/ -{ - "user": "kimchy" -} ----- -// TEST[continued] - -==== Failback when `clusterA` comes back - -When `clusterA` comes back, `clusterB` becomes the new leader and `clusterA` becomes the follower. - -. Set up remote cluster `clusterB` on `clusterA`. -+ -[source,console] ----- -### On clusterA ### -PUT _cluster/settings -{ - "persistent": { - "cluster": { - "remote": { - "clusterB": { - "mode": "proxy", - "skip_unavailable": "true", - "server_name": "clusterb.es.region-b.gcp.elastic-cloud.com", - "proxy_socket_connections": "18", - "proxy_address": "clusterb.es.region-b.gcp.elastic-cloud.com:9400" - } - } - } - } -} ----- -// TEST[setup:host] -// TEST[s/"server_name": "clusterb.es.region-b.gcp.elastic-cloud.com",//] -// TEST[s/"proxy_socket_connections": 18,//] -// TEST[s/clusterb.es.region-b.gcp.elastic-cloud.com:9400/\${transport_host}/] -// TEST[s/clusterB/remote_cluster/] - -. Existing data needs to be discarded before you can turn any index into a -follower. Ensure the most up-to-date data is available on `clusterB` prior to -deleting any indices on `clusterA`. -+ -[source,console] ----- -### On clusterA ### -DELETE kibana_sample_data_ecommerce ----- -// TEST[skip:need dual cluster setup] - - -. Create a follower index on `clusterA`, now following the leader index in -`clusterB`. -+ -[source,console] ----- -### On clusterA ### -PUT /kibana_sample_data_ecommerce/_ccr/follow?wait_for_active_shards=1 -{ - "remote_cluster": "clusterB", - "leader_index": "kibana_sample_data_ecommerce2" -} ----- -// TEST[continued] -// TEST[s/clusterB/remote_cluster/] - -. The index on the follower cluster now contains the updated documents. -+ -[source,console] ----- -### On clusterA ### -GET kibana_sample_data_ecommerce/_search?q=kimchy ----- -// TEST[continued] -+ -TIP: If a soft delete is merged away before it can be replicated to a follower the following process will fail due to incomplete history on the leader, see <> for more details. diff --git a/docs/reference/ccr/upgrading.asciidoc b/docs/reference/ccr/upgrading.asciidoc deleted file mode 100644 index 6976042872260..0000000000000 --- a/docs/reference/ccr/upgrading.asciidoc +++ /dev/null @@ -1,66 +0,0 @@ -[role="xpack"] -[[ccr-upgrading]] -=== Upgrading clusters using {ccr} -++++ -Upgrading clusters -++++ - -Clusters that are actively using {ccr} require a careful approach to upgrades. -The following conditions could cause index following to fail during rolling -upgrades: - -* Clusters that have not yet been upgraded will reject new index settings or -mapping types that are replicated from an upgraded cluster. -* Nodes in a cluster that has not been upgraded will reject index files from a -node in an upgraded cluster when index following tries to fall back to -file-based recovery. This limitation is due to Lucene not being forward -compatible. - -The approach to running a rolling upgrade on clusters where {ccr} is -enabled differs based on uni-directional and bi-directional index following. - -[[ccr-uni-directional-upgrade]] -==== Uni-directional index following -In a uni-directional configuration, one cluster contains only -leader indices, and the other cluster contains only follower indices that -replicate the leader indices. - -In this strategy, the cluster with follower indices should be upgraded -first and the cluster with leader indices should be upgraded last. -Upgrading the clusters in this order ensures that index following can continue -during the upgrade without downtime. - -You can also use this strategy to upgrade a -<>. Start by upgrading clusters at -the end of the chain and working your way back to the cluster that contains the -leader indices. - -For example, consider a configuration where Cluster A contains all leader -indices. Cluster B follows indices in Cluster A, and Cluster C follows indices -in Cluster B. - --- - Cluster A - ^--Cluster B - ^--Cluster C --- - -In this configuration, upgrade the clusters in the following order: - -. Cluster C -. Cluster B -. Cluster A - -[[ccr-bi-directional-upgrade]] -==== Bi-directional index following - -In a bi-directional configuration, each cluster contains both leader and -follower indices. - -When upgrading clusters in this configuration, -<> and -<> prior to -upgrading both clusters. - -After upgrading both clusters, resume index following and resume replication -of auto-follow patterns. diff --git a/docs/reference/cluster.asciidoc b/docs/reference/cluster.asciidoc deleted file mode 100644 index 398ece616fe07..0000000000000 --- a/docs/reference/cluster.asciidoc +++ /dev/null @@ -1,131 +0,0 @@ -[[cluster]] -== Cluster APIs - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -["float",id="cluster-nodes"] -=== Node specification - -Some cluster-level APIs may operate on a subset of the nodes which can be -specified with _node filters_. For example, the <>, -<>, and <> APIs -can all report results from a filtered set of nodes rather than from all nodes. - -_Node filters_ are written as a comma-separated list of individual filters, -each of which adds or removes nodes from the chosen subset. Each filter can be -one of the following: - -* `_all`, to add all nodes to the subset. -* `_local`, to add the local node to the subset. -* `_master`, to add the currently-elected master node to the subset. -* a node id or name, to add this node to the subset. -* an IP address or hostname, to add all matching nodes to the subset. -* a pattern, using `*` wildcards, which adds all nodes to the subset - whose name, address or hostname matches the pattern. -* `master:true`, `data:true`, `ingest:true`, `voting_only:true`, `ml:true`, or - `coordinating_only:true`, which respectively add to the subset all - master-eligible nodes, all data nodes, all ingest nodes, all voting-only - nodes, all machine learning nodes, and all coordinating-only nodes. -* `master:false`, `data:false`, `ingest:false`, `voting_only:false`, `ml:false`, - or `coordinating_only:false`, which respectively remove from the subset all - master-eligible nodes, all data nodes, all ingest nodes, all voting-only - nodes, all machine learning nodes, and all coordinating-only nodes. -* a pair of patterns, using `*` wildcards, of the form `attrname:attrvalue`, - which adds to the subset all nodes with a <> whose name - and value match the respective patterns. Custom node attributes are - configured by setting properties in the configuration file of the form - `node.attr.attrname: attrvalue`. - -NOTE: node filters run in the order in which they are given, which is important -if using filters that remove nodes from the set. For example -`_all,master:false` means all the nodes except the master-eligible ones, but -`master:false,_all` means the same as `_all` because the `_all` filter runs -after the `master:false` filter. - -NOTE: if no filters are given, the default is to select all nodes. However, if -any filters are given then they run starting with an empty chosen subset. This -means that filters such as `master:false` which remove nodes from the chosen -subset are only useful if they come after some other filters. When used on its -own, `master:false` selects no nodes. - -Here are some examples of the use of node filters with the -<> APIs. - -[source,console] --------------------------------------------------- -# If no filters are given, the default is to select all nodes -GET /_nodes -# Explicitly select all nodes -GET /_nodes/_all -# Select just the local node -GET /_nodes/_local -# Select the elected master node -GET /_nodes/_master -# Select nodes by name, which can include wildcards -GET /_nodes/node_name_goes_here -GET /_nodes/node_name_goes_* -# Select nodes by address, which can include wildcards -GET /_nodes/10.0.0.3,10.0.0.4 -GET /_nodes/10.0.0.* -# Select nodes by role -GET /_nodes/_all,master:false -GET /_nodes/data:true,ingest:true -GET /_nodes/coordinating_only:true -GET /_nodes/master:true,voting_only:false -# Select nodes by custom attribute (e.g. with something like `node.attr.rack: 2` in the configuration file) -GET /_nodes/rack:2 -GET /_nodes/ra*:2 -GET /_nodes/ra*:2* --------------------------------------------------- - -include::cluster/allocation-explain.asciidoc[] - -include::cluster/get-settings.asciidoc[] - -include::cluster/health.asciidoc[] - -include::health/health.asciidoc[] - -include::cluster/reroute.asciidoc[] - -include::cluster/state.asciidoc[] - -include::cluster/stats.asciidoc[] - -include::cluster/update-settings.asciidoc[] - -include::cluster/nodes-usage.asciidoc[] - -include::cluster/nodes-hot-threads.asciidoc[] - -include::cluster/nodes-info.asciidoc[] - -include::cluster/prevalidate-node-removal.asciidoc[] - -include::cluster/nodes-reload-secure-settings.asciidoc[] - -include::cluster/nodes-stats.asciidoc[] - -include::cluster/cluster-info.asciidoc[] - -include::cluster/pending.asciidoc[] - -include::cluster/remote-info.asciidoc[] - -include::cluster/tasks.asciidoc[] - -include::cluster/voting-exclusions.asciidoc[] - -include::cluster/update-desired-nodes.asciidoc[] - -include::cluster/get-desired-nodes.asciidoc[] - -include::cluster/delete-desired-nodes.asciidoc[] - -include::cluster/get-desired-balance.asciidoc[] - -include::cluster/delete-desired-balance.asciidoc[] diff --git a/docs/reference/cluster/allocation-explain.asciidoc b/docs/reference/cluster/allocation-explain.asciidoc deleted file mode 100644 index 7099163cc98e9..0000000000000 --- a/docs/reference/cluster/allocation-explain.asciidoc +++ /dev/null @@ -1,462 +0,0 @@ -[[cluster-allocation-explain]] -=== Cluster allocation explain API -++++ -Cluster allocation explain -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Provides an explanation for a shard's current <>. - -[source,console] ----- -GET _cluster/allocation/explain -{ - "index": "my-index-000001", - "shard": 0, - "primary": false, - "current_node": "my-node" -} ----- -// TEST[setup:my_index] -// TEST[s/"primary": false,/"primary": false/] -// TEST[s/"current_node": "my-node"//] - -[[cluster-allocation-explain-api-request]] -==== {api-request-title} - -`GET _cluster/allocation/explain` - -`POST _cluster/allocation/explain` - -[[cluster-allocation-explain-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cluster-allocation-explain-api-desc]] -==== {api-description-title} - -The purpose of the cluster allocation explain API is to provide -explanations for shard allocations in the cluster. For unassigned shards, -the explain API provides an explanation for why the shard is unassigned. -For assigned shards, the explain API provides an explanation for why the -shard is remaining on its current node and has not moved or rebalanced to -another node. This API can be very useful when attempting to diagnose why a -shard is unassigned or why a shard continues to remain on its current node when -you might expect otherwise. - -[[cluster-allocation-explain-api-query-params]] -==== {api-query-parms-title} - -`include_disk_info`:: - (Optional, Boolean) If `true`, returns information about disk usage and - shard sizes. Defaults to `false`. - -`include_yes_decisions`:: - (Optional, Boolean) If `true`, returns 'YES' decisions in explanation. - Defaults to `false`. - -[[cluster-allocation-explain-api-request-body]] -==== {api-request-body-title} - -`current_node`:: - (Optional, string) Specifies the node ID or the name of the node currently - holding the shard to explain. To explain an unassigned shard, omit this - parameter. - -`index`:: - (Optional, string) Specifies the name of the index that you would like an - explanation for. - -`primary`:: - (Optional, Boolean) If `true`, returns explanation for the primary shard - for the given shard ID. - -`shard`:: - (Optional, integer) Specifies the ID of the shard that you would like an - explanation for. - -[[cluster-allocation-explain-api-examples]] -==== {api-examples-title} - -===== Unassigned primary shard - -====== Conflicting settings -The following request gets an allocation explanation for an unassigned primary -shard. - -//// -[source,console] ----- -PUT my-index-000001?master_timeout=1s&timeout=1s -{ - "settings": { - "index.routing.allocation.include._name": "nonexistent_node", - "index.routing.allocation.include._tier_preference": null - } -} ----- -//// - -[source,console] ----- -GET _cluster/allocation/explain -{ - "index": "my-index-000001", - "shard": 0, - "primary": true -} ----- -// TEST[continued] - -The API response indicates the shard can only be allocated to a nonexistent -node. - -[source,console-result] ----- -{ - "index" : "my-index-000001", - "shard" : 0, - "primary" : true, - "current_state" : "unassigned", <1> - "unassigned_info" : { - "reason" : "INDEX_CREATED", <2> - "at" : "2017-01-04T18:08:16.600Z", - "last_allocation_status" : "no" - }, - "can_allocate" : "no", <3> - "allocate_explanation" : "Elasticsearch isn't allowed to allocate this shard to any of the nodes in the cluster. Choose a node to which you expect this shard to be allocated, find this node in the node-by-node explanation, and address the reasons which prevent Elasticsearch from allocating this shard there.", - "node_allocation_decisions" : [ - { - "node_id" : "8qt2rY-pT6KNZB3-hGfLnw", - "node_name" : "node-0", - "transport_address" : "127.0.0.1:9401", - "roles" : ["data", "data_cold", "data_content", "data_frozen", "data_hot", "data_warm", "ingest", "master", "ml", "remote_cluster_client", "transform"], - "node_attributes" : {}, - "node_decision" : "no", <4> - "weight_ranking" : 1, - "deciders" : [ - { - "decider" : "filter", <5> - "decision" : "NO", - "explanation" : "node does not match index setting [index.routing.allocation.include] filters [_name:\"nonexistent_node\"]" <6> - } - ] - } - ] -} ----- -// TESTRESPONSE[s/"at" : "[^"]*"/"at" : $body.$_path/] -// TESTRESPONSE[s/"node_id" : "[^"]*"/"node_id" : $body.$_path/] -// TESTRESPONSE[s/"transport_address" : "[^"]*"/"transport_address" : $body.$_path/] -// TESTRESPONSE[s/"roles" : \[("[a-z_]*",)*("[a-z_]*")\]/"roles" : $body.$_path/] -// TESTRESPONSE[s/"node_attributes" : \{\}/"node_attributes" : $body.$_path/] - -<1> The current state of the shard. -<2> The reason for the shard originally becoming unassigned. -<3> Whether to allocate the shard. -<4> Whether to allocate the shard to the particular node. -<5> The decider which led to the `no` decision for the node. -<6> An explanation as to why the decider returned a `no` decision, with a helpful hint pointing to the setting that led to the decision. In this example, a newly created index has <> that requires that it only be allocated to a node named `nonexistent_node`, which does not exist, so the index is unable to allocate. - -See https://www.youtube.com/watch?v=5z3n2VgusLE[this video] for a walkthrough of troubleshooting a node and index setting mismatch. - -[[maximum-number-of-retries-exceeded]] -====== Maximum number of retries exceeded - -The following response contains an allocation explanation for an unassigned -primary shard that has reached the maximum number of allocation retry attempts. - -[source,js] ----- -{ - "index" : "my-index-000001", - "shard" : 0, - "primary" : true, - "current_state" : "unassigned", - "unassigned_info" : { - "at" : "2017-01-04T18:03:28.464Z", - "failed shard on node [mEKjwwzLT1yJVb8UxT6anw]: failed recovery, failure RecoveryFailedException", - "reason": "ALLOCATION_FAILED", - "failed_allocation_attempts": 5, - "last_allocation_status": "no", - }, - "can_allocate": "no", - "allocate_explanation": "cannot allocate because allocation is not permitted to any of the nodes", - "node_allocation_decisions" : [ - { - "node_id" : "3sULLVJrRneSg0EfBB-2Ew", - "node_name" : "node_t0", - "transport_address" : "127.0.0.1:9400", - "roles" : ["data_content", "data_hot"], - "node_decision" : "no", - "store" : { - "matching_size" : "4.2kb", - "matching_size_in_bytes" : 4325 - }, - "deciders" : [ - { - "decider": "max_retry", - "decision" : "NO", - "explanation": "shard has exceeded the maximum number of retries [5] on failed allocation attempts - manually call [POST /_cluster/reroute?retry_failed] to retry, [unassigned_info[[reason=ALLOCATION_FAILED], at[2024-07-30T21:04:12.166Z], failed_attempts[5], failed_nodes[[mEKjwwzLT1yJVb8UxT6anw]], delayed=false, details[failed shard on node [mEKjwwzLT1yJVb8UxT6anw]: failed recovery, failure RecoveryFailedException], allocation_status[deciders_no]]]" - } - ] - } - ] -} ----- -// NOTCONSOLE -When Elasticsearch is unable to allocate a shard, it will attempt to retry allocation up to -the maximum number of retries allowed. After this, Elasticsearch will stop attempting to -allocate the shard in order to prevent infinite retries which may impact cluster -performance. Run the <> API to retry allocation, which -will allocate the shard if the issue preventing allocation has been resolved. - -[[no-valid-shard-copy]] -====== No valid shard copy - -The following response contains an allocation explanation for an unassigned -primary shard that was previously allocated. - -[source,js] ----- -{ - "index" : "my-index-000001", - "shard" : 0, - "primary" : true, - "current_state" : "unassigned", - "unassigned_info" : { - "reason" : "NODE_LEFT", - "at" : "2017-01-04T18:03:28.464Z", - "details" : "node_left[OIWe8UhhThCK0V5XfmdrmQ]", - "last_allocation_status" : "no_valid_shard_copy" - }, - "can_allocate" : "no_valid_shard_copy", - "allocate_explanation" : "Elasticsearch can't allocate this shard because there are no copies of its data in the cluster. Elasticsearch will allocate this shard when a node holding a good copy of its data joins the cluster. If no such node is available, restore this index from a recent snapshot." -} ----- -// NOTCONSOLE - -If a shard is unassigned with an allocation status of `no_valid_shard_copy`, then you should <>. If all the nodes containing in-sync copies of a shard are lost, then you can <>. - -See https://www.youtube.com/watch?v=6OAg9IyXFO4[this video] for a walkthrough of troubleshooting `no_valid_shard_copy`. - -===== Unassigned replica shard - -====== Allocation delayed - -The following response contains an allocation explanation for a replica that's -unassigned due to <>. - -[source,js] ----- -{ - "index" : "my-index-000001", - "shard" : 0, - "primary" : false, - "current_state" : "unassigned", - "unassigned_info" : { - "reason" : "NODE_LEFT", - "at" : "2017-01-04T18:53:59.498Z", - "details" : "node_left[G92ZwuuaRY-9n8_tc-IzEg]", - "last_allocation_status" : "no_attempt" - }, - "can_allocate" : "allocation_delayed", - "allocate_explanation" : "The node containing this shard copy recently left the cluster. Elasticsearch is waiting for it to return. If the node does not return within [%s] then Elasticsearch will allocate this shard to another node. Please wait.", - "configured_delay" : "1m", <1> - "configured_delay_in_millis" : 60000, - "remaining_delay" : "59.8s", <2> - "remaining_delay_in_millis" : 59824, - "node_allocation_decisions" : [ - { - "node_id" : "pmnHu_ooQWCPEFobZGbpWw", - "node_name" : "node_t2", - "transport_address" : "127.0.0.1:9402", - "roles" : ["data_content", "data_hot"], - "node_decision" : "yes" - }, - { - "node_id" : "3sULLVJrRneSg0EfBB-2Ew", - "node_name" : "node_t0", - "transport_address" : "127.0.0.1:9400", - "roles" : ["data_content", "data_hot"], - "node_decision" : "no", - "store" : { <3> - "matching_size" : "4.2kb", - "matching_size_in_bytes" : 4325 - }, - "deciders" : [ - { - "decider" : "same_shard", - "decision" : "NO", - "explanation" : "a copy of this shard is already allocated to this node [[my-index-000001][0], node[3sULLVJrRneSg0EfBB-2Ew], [P], s[STARTED], a[id=eV9P8BN1QPqRc3B4PLx6cg]]" - } - ] - } - ] -} ----- -// NOTCONSOLE - -<1> The configured delay before allocating a replica shard that does not exist due to the node holding it leaving the cluster. -<2> The remaining delay before allocating the replica shard. -<3> Information about the shard data found on a node. - -====== Allocation throttled - -The following response contains an allocation explanation for a replica that's -queued to allocate but currently waiting on other queued shards. - -[source,js] ----- -{ - "index" : "my-index-000001", - "shard" : 0, - "primary" : false, - "current_state" : "unassigned", - "unassigned_info" : { - "reason" : "NODE_LEFT", - "at" : "2017-01-04T18:53:59.498Z", - "details" : "node_left[G92ZwuuaRY-9n8_tc-IzEg]", - "last_allocation_status" : "no_attempt" - }, - "can_allocate": "throttled", - "allocate_explanation": "Elasticsearch is currently busy with other activities. It expects to be able to allocate this shard when those activities finish. Please wait.", - "node_allocation_decisions" : [ - { - "node_id" : "3sULLVJrRneSg0EfBB-2Ew", - "node_name" : "node_t0", - "transport_address" : "127.0.0.1:9400", - "roles" : ["data_content", "data_hot"], - "node_decision" : "no", - "deciders" : [ - { - "decider": "throttling", - "decision": "THROTTLE", - "explanation": "reached the limit of incoming shard recoveries [2], cluster setting [cluster.routing.allocation.node_concurrent_incoming_recoveries=2] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])" - } - ] - } - ] -} ----- -// NOTCONSOLE - -This is a transient message that might appear when a large amount of shards are allocating. - -===== Assigned shard - -====== Cannot remain on current node - -The following response contains an allocation explanation for an assigned shard. -The response indicates the shard is not allowed to remain on its current node -and must be reallocated. - -[source,js] ----- -{ - "index" : "my-index-000001", - "shard" : 0, - "primary" : true, - "current_state" : "started", - "current_node" : { - "id" : "8lWJeJ7tSoui0bxrwuNhTA", - "name" : "node_t1", - "transport_address" : "127.0.0.1:9401", - "roles" : ["data_content", "data_hot"] - }, - "can_remain_on_current_node" : "no", <1> - "can_remain_decisions" : [ <2> - { - "decider" : "filter", - "decision" : "NO", - "explanation" : "node does not match index setting [index.routing.allocation.include] filters [_name:\"nonexistent_node\"]" - } - ], - "can_move_to_other_node" : "no", <3> - "move_explanation" : "This shard may not remain on its current node, but Elasticsearch isn't allowed to move it to another node. Choose a node to which you expect this shard to be allocated, find this node in the node-by-node explanation, and address the reasons which prevent Elasticsearch from allocating this shard there.", - "node_allocation_decisions" : [ - { - "node_id" : "_P8olZS8Twax9u6ioN-GGA", - "node_name" : "node_t0", - "transport_address" : "127.0.0.1:9400", - "roles" : ["data_content", "data_hot"], - "node_decision" : "no", - "weight_ranking" : 1, - "deciders" : [ - { - "decider" : "filter", - "decision" : "NO", - "explanation" : "node does not match index setting [index.routing.allocation.include] filters [_name:\"nonexistent_node\"]" - } - ] - } - ] -} ----- -// NOTCONSOLE - -<1> Whether the shard is allowed to remain on its current node. -<2> The deciders that factored into the decision of why the shard is not allowed to remain on its current node. -<3> Whether the shard is allowed to be allocated to another node. - -====== Must remain on current node - -The following response contains an allocation explanation for a shard that must -remain on its current node. Moving the shard to another node would not improve -cluster balance. - -[source,js] ----- -{ - "index" : "my-index-000001", - "shard" : 0, - "primary" : true, - "current_state" : "started", - "current_node" : { - "id" : "wLzJm4N4RymDkBYxwWoJsg", - "name" : "node_t0", - "transport_address" : "127.0.0.1:9400", - "roles" : ["data_content", "data_hot"], - "weight_ranking" : 1 - }, - "can_remain_on_current_node" : "yes", - "can_rebalance_cluster" : "yes", <1> - "can_rebalance_to_other_node" : "no", <2> - "rebalance_explanation" : "Elasticsearch cannot rebalance this shard to another node since there is no node to which allocation is permitted which would improve the cluster balance. If you expect this shard to be rebalanced to another node, find this node in the node-by-node explanation and address the reasons which prevent Elasticsearch from rebalancing this shard there.", - "node_allocation_decisions" : [ - { - "node_id" : "oE3EGFc8QN-Tdi5FFEprIA", - "node_name" : "node_t1", - "transport_address" : "127.0.0.1:9401", - "roles" : ["data_content", "data_hot"], - "node_decision" : "worse_balance", <3> - "weight_ranking" : 1 - } - ] -} ----- -// NOTCONSOLE - -<1> Whether rebalancing is allowed on the cluster. -<2> Whether the shard can be rebalanced to another node. -<3> The reason the shard cannot be rebalanced to the node, in this case indicating that it offers no better balance than the current node. - -===== No arguments - -If you call the API with no arguments, {es} retrieves an allocation explanation -for an arbitrary unassigned primary or replica shard, returning any unassigned primary shards first. - -[source,console] ----- -GET _cluster/allocation/explain ----- -// TEST[catch:bad_request] - -If the cluster contains no unassigned shards, the API returns a `400` error. diff --git a/docs/reference/cluster/cluster-info.asciidoc b/docs/reference/cluster/cluster-info.asciidoc deleted file mode 100644 index 52ae637d8f46c..0000000000000 --- a/docs/reference/cluster/cluster-info.asciidoc +++ /dev/null @@ -1,406 +0,0 @@ -[[cluster-info]] -=== Cluster Info API - -experimental::[] - -++++ -Cluster Info -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Returns cluster information. - -[[cluster-info-api-request]] -==== {api-request-title} - -`GET /_info/` + - -[[cluster-info-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - - -[[cluster-info-api-desc]] -==== {api-description-title} - -You can use the Cluster Info API to retrieve information of a cluster. - - -[[cluster-info-api-path-params]] -==== {api-path-parms-title} - - -``:: -(String) Limits the information returned to the specific `target`. -A comma-separated list of the following options: -+ --- -`_all`:: -All the information available. Can not be mixed with other targets. - -`http`:: -HTTP connection information. - -`ingest`:: -Ingest information. - -`thread_pool`:: -Statistics about each thread pool, including current size, queue size and rejected tasks. - -`script`:: -Contains script statistics of the cluster. --- - -[role="child_attributes"] -[[cluster-info-api-response-body]] -==== {api-response-body-title} - -`cluster_name`:: -(string) -Name of the cluster. Based on the <> setting. - - -[[cluster-info-api-response-body-http]] -`http`:: -(object) -Contains http information for the cluster. -+ -.Properties of `http` -[%collapsible%open] -====== -`current_open`:: -(integer) -Current number of open HTTP connections for the cluster. - -`total_opened`:: -(integer) -Total number of HTTP connections opened for the cluster. - -`clients`:: -(array of objects) -Information on current and recently-closed HTTP client connections. -Clients that have been closed longer than the <> -setting will not be represented here. -+ -.Properties of `clients` -[%collapsible%open] -======= -`id`:: -(integer) -Unique ID for the HTTP client. - -`agent`:: -(string) -Reported agent for the HTTP client. If unavailable, this property is not -included in the response. - -`local_address`:: -(string) -Local address for the HTTP connection. - -`remote_address`:: -(string) -Remote address for the HTTP connection. - -`last_uri`:: -(string) -The URI of the client's most recent request. - -`x_forwarded_for`:: -(string) -Value from the client's `x-forwarded-for` HTTP header. If unavailable, this -property is not included in the response. - -`x_opaque_id`:: -(string) -Value from the client's `x-opaque-id` HTTP header. If unavailable, this property -is not included in the response. - -`opened_time_millis`:: -(integer) -Time at which the client opened the connection. - -`closed_time_millis`:: -(integer) -Time at which the client closed the connection if the connection is closed. - -`last_request_time_millis`:: -(integer) -Time of the most recent request from this client. - -`request_count`:: -(integer) -Number of requests from this client. - -`request_size_bytes`:: -(integer) -Cumulative size in bytes of all requests from this client. -======= -====== - - -[[cluster-info-api-response-body-ingest]] -`ingest`:: -(object) -Contains ingest information for the cluster. -+ -.Properties of `ingest` -[%collapsible%open] -====== -`total`:: -(object) -Contains information about ingest operations for the cluster. -+ -.Properties of `total` -[%collapsible%open] -======= -`count`:: -(integer) -Total number of documents ingested across the cluster. - -`time`:: -(<>) -Total time spent preprocessing ingest documents across the cluster. - -`time_in_millis`:: -(integer) -Total time, in milliseconds, spent preprocessing ingest documents across the cluster. - -`current`:: -(integer) -Total number of documents currently being ingested. - -`failed`:: -(integer) -Total number of failed ingest operations across the cluster. -======= - -`pipelines`:: -(object) -Contains information about ingest pipelines for the cluster. -+ -.Properties of `pipelines` -[%collapsible%open] -======= -``:: -(object) -Contains information about the ingest pipeline. -+ -.Properties of `` -[%collapsible%open] -======== -`count`:: -(integer) -Number of documents preprocessed by the ingest pipeline. - -`time`:: -(<>) -Total time spent preprocessing documents in the ingest pipeline. - -`time_in_millis`:: -(integer) -Total time, in milliseconds, spent preprocessing documents in the ingest -pipeline. - -`failed`:: -(integer) -Total number of failed operations for the ingest pipeline. - -`ingested_as_first_pipeline`:: -(<>) -Total ingested size of all documents which were first processed by this pipeline. -A document is not added to the stat value for this pipeline if it is a final pipeline after a default pipeline, a pipeline -run after a reroute processor, or is within a pipeline processor. -Instead, the document size is added to the stat value of the pipeline which initially ingested the document. - -`ingested_as_first_pipeline_in_bytes`:: -(integer) -Total ingested size, in bytes, of all documents which were first processed by this pipeline. -A document is not added to the stat value for this pipeline if it is a final pipeline after a default pipeline, a pipeline -run after a reroute processor, or is within a pipeline processor. -Instead, the document size is added to the stat value of the pipeline which initially ingested the document. - -`produced_as_first_pipeline`:: -(<>) -Total produced size of all documents which were first processed by this pipeline. -A document is not added to the stat value for this pipeline if it is a final pipeline after a default pipeline, a pipeline -run after a reroute processor, or is within a pipeline processor. -Instead, the document size is added to the stat value of the pipeline which initially ingested the document. - -`produced_as_first_pipeline_in_bytes`:: -(integer) -Total produced size, in bytes, of all documents which were first processed by this pipeline. -A document is not added to the stat value for this pipeline if it is a final pipeline after a default pipeline, a pipeline -run after a reroute processor, or is within a pipeline processor. -Instead, the document size is added to the stat value of the pipeline which initially ingested the document. - -`processors`:: -(array of objects) -Contains information for the ingest processors for the ingest pipeline. -+ -.Properties of `processors` -[%collapsible%open] -========= -``:: -(object) -Contains information for the ingest processor. -+ -.Properties of `` -[%collapsible%open] -========== -`count`:: -(integer) -Number of documents transformed by the processor. - -`time`:: -(<>) -Time spent by the processor transforming documents. - -`time_in_millis`:: -(integer) -Time, in milliseconds, spent by the processor transforming documents. - -`current`:: -(integer) -Number of documents currently being transformed by the processor. - -`failed`:: -(integer) -Number of failed operations for the processor. -========== -========= -======== -======= -====== - - -[[cluster-info-api-response-body-threadpool]] -`thread_pool`:: -(object) -Contains information about the thread pools of the cluster. -+ -.Properties of `thread_pool` -[%collapsible%open] -====== -``:: -(object) -Contains information about the thread pool of the cluster with name ``. -+ -.Properties of `` -[%collapsible%open] -======= -`threads`:: -(integer) -Number of threads in the thread pool. - -`queue`:: -(integer) -Number of tasks in queue for the thread pool. - -`active`:: -(integer) -Number of active threads in the thread pool. - -`rejected`:: -(integer) -Number of tasks rejected by the thread pool executor. - -`largest`:: -(integer) -Highest number of active threads in the thread pool. - -`completed`:: -(integer) -Number of tasks completed by the thread pool executor. -======= -====== - -[[cluster-info-api-response-body-script]] -`script`:: -(object) -Contains script statistics of the cluster. -+ -.Properties of `script` -[%collapsible%open] -====== -`compilations`:: -(integer) -Total number of inline script compilations performed by the cluster. - -`compilations_history`:: -(object) -Contains the recent history of script compilations. - -.Properties of `compilations_history` -[%collapsible%open] -======= -`5m`:: -(long) -The number of script compilations in the last five minutes. -`15m`:: -(long) -The number of script compilations in the last fifteen minutes. -`24h`:: -(long) -The number of script compilations in the last twenty-four hours. -======= - -`cache_evictions`:: -(integer) -Total number of times the script cache has evicted old data. - - -`cache_evictions_history`:: -(object) -Contains the recent history of script cache evictions. - -.Properties of `cache_evictions` -[%collapsible%open] -======= -`5m`:: -(long) -The number of script cache evictions in the last five minutes. -`15m`:: -(long) -The number of script cache evictions in the last fifteen minutes. -`24h`:: -(long) -The number of script cache evictions in the last twenty-four hours. -======= - -`compilation_limit_triggered`:: -(integer) -Total number of times the <> circuit breaker has limited inline script compilations. -====== - -[[cluster-info-api-example]] -==== {api-examples-title} - -[source,console] ----- -# returns all stats info of the cluster -GET /_info/_all - -# returns the http info of the cluster -GET /_info/http - -# returns the http info of the cluster -GET /_info/ingest - -# returns the thread_pool info of the cluster -GET /_info/thread_pool - -# returns the script info of the cluster -GET /_info/script - -# returns the http and ingest info of the cluster -GET /_info/http,ingest ----- diff --git a/docs/reference/cluster/delete-desired-balance.asciidoc b/docs/reference/cluster/delete-desired-balance.asciidoc deleted file mode 100644 index 45fa147258b78..0000000000000 --- a/docs/reference/cluster/delete-desired-balance.asciidoc +++ /dev/null @@ -1,30 +0,0 @@ -[[delete-desired-balance]] -=== Reset desired balance API -++++ -Reset desired balance -++++ - -NOTE: {cloud-only} - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Discards the current <> and computes a new desired balance starting from the current allocation of shards. -This can sometimes help {es} find a desired balance which needs fewer shard movements to achieve, especially if the -cluster has experienced changes so substantial that the current desired balance is no longer optimal without {es} having -detected that the current desired balance will take more shard movements to achieve than needed. However, this API -imposes a heavy load on the elected master node and may not always have the expected effect. Calling this API should -never be necessary. Consider instead <> to avoid excessive shard movements. - -[[delete-desired-balance-request]] -==== {api-request-title} - -[source,console] --------------------------------------------------- -DELETE /_internal/desired_balance --------------------------------------------------- -// TEST[skip:Can't reliably test desired balance] diff --git a/docs/reference/cluster/delete-desired-nodes.asciidoc b/docs/reference/cluster/delete-desired-nodes.asciidoc deleted file mode 100644 index 883bc22f6d964..0000000000000 --- a/docs/reference/cluster/delete-desired-nodes.asciidoc +++ /dev/null @@ -1,64 +0,0 @@ -[[delete-desired-nodes]] -=== Delete desired nodes API -++++ -Delete desired nodes -++++ - -NOTE: {cloud-only} - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Delete desired nodes. - -[[delete-desired-nodes-request]] -==== {api-request-title} - -////////////////////////// -[source,console] --------------------------------------------------- -PUT /_internal/desired_nodes/history/1 -{ - "nodes" : [ - { - "settings" : { - "node.name" : "instance-000187", - "node.external_id": "instance-000187", - "node.roles" : ["data_hot", "master"], - "node.attr.data" : "hot", - "node.attr.logical_availability_zone" : "zone-0" - }, - "processors" : 8.0, - "memory" : "58gb", - "storage" : "2tb" - } - ] -} --------------------------------------------------- -// TESTSETUP -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_internal/desired_nodes --------------------------------------------------- -// TEST - -[[delete-desired-nodes-desc]] -==== {api-description-title} - -This API deletes the desired nodes. - -[[delete-desired-nodes-examples]] -==== {api-examples-title} - -This example deletes the current desired nodes. - -[source,console] --------------------------------------------------- -DELETE /_internal/desired_nodes --------------------------------------------------- -// TEST diff --git a/docs/reference/cluster/get-desired-balance.asciidoc b/docs/reference/cluster/get-desired-balance.asciidoc deleted file mode 100644 index 3c6b1dc48719c..0000000000000 --- a/docs/reference/cluster/get-desired-balance.asciidoc +++ /dev/null @@ -1,248 +0,0 @@ -[[get-desired-balance]] -=== Get desired balance API -++++ -Get desired balance -++++ - -NOTE: {cloud-only} - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Exposes: - -* the <> computation and reconciliation stats -* balancing stats such as distribution of shards, disk and ingest forecasts - across nodes and data tiers (based on the current cluster state) -* routing table with each shard current and desired location -* cluster info with nodes disk usages - -[[get-desired-balance-request]] -==== {api-request-title} - -[source,console] --------------------------------------------------- -GET /_internal/desired_balance --------------------------------------------------- -// TEST[skip:Can't reliably test desired balance] - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "stats": { - "computation_active": false, - "computation_submitted": 5, - "computation_executed": 5, - "computation_converged": 5, - "computation_iterations": 4, - "computation_converged_index": 4, - "computation_time_in_millis": 0, - "reconciliation_time_in_millis": 0 - }, - "cluster_balance_stats" : { - "shard_count": 37, - "undesired_shard_allocation_count": 0, - "tiers": { - "data_hot" : { - "shard_count" : { - "total" : 7.0, - "min" : 2.0, - "max" : 3.0, - "average" : 2.3333333333333335, - "std_dev" : 0.4714045207910317 - }, - "undesired_shard_allocation_count" : { - "total" : 0.0, - "min" : 0.0, - "max" : 0.0, - "average" : 0.0, - "std_dev" : 0.0 - }, - "forecast_write_load" : { - "total" : 21.0, - "min" : 6.0, - "max" : 8.5, - "average" : 7.0, - "std_dev" : 1.0801234497346435 - }, - "forecast_disk_usage" : { - "total" : 36.0, - "min" : 10.0, - "max" : 16.0, - "average" : 12.0, - "std_dev" : 2.8284271247461903 - }, - "actual_disk_usage" : { - "total" : 36.0, - "min" : 10.0, - "max" : 16.0, - "average" : 12.0, - "std_dev" : 2.8284271247461903 - } - }, - "data_warm" : { - "shard_count" : { - "total" : 3.0, - "min" : 1.0, - "max" : 1.0, - "average" : 1.0, - "std_dev" : 0.0 - }, - "undesired_shard_allocation_count" : { - "total" : 0.0, - "min" : 0.0, - "max" : 0.0, - "average" : 0.0, - "std_dev" : 0.0 - }, - "forecast_write_load" : { - "total" : 0.0, - "min" : 0.0, - "max" : 0.0, - "average" : 0.0, - "std_dev" : 0.0 - }, - "forecast_disk_usage" : { - "total" : 42.0, - "min" : 12.0, - "max" : 18.0, - "average" : 14.0, - "std_dev" : 2.8284271247461903 - }, - "actual_disk_usage" : { - "total" : 42.0, - "min" : 12.0, - "max" : 18.0, - "average" : 14.0, - "std_dev" : 2.8284271247461903 - } - } - }, - "nodes": { - "node-1": { - "node_id": "UPYt8VwWTt-IADAEbqpLxA", - "roles": ["data_content"], - "shard_count": 10, - "undesired_shard_allocation_count": 0, - "forecast_write_load": 8.5, - "forecast_disk_usage_bytes": 498435, - "actual_disk_usage_bytes": 498435 - }, - "node-2": { - "node_id": "bgC66tboTIeFQ0VgRGI4Gg", - "roles": ["data_content"], - "shard_count": 15, - "undesired_shard_allocation_count": 0, - "forecast_write_load": 3.25, - "forecast_disk_usage_bytes": 384935, - "actual_disk_usage_bytes": 384935 - }, - "node-3": { - "node_id": "2x1VTuSOQdeguXPdN73yRw", - "roles": ["data_content"], - "shard_count": 12, - "undesired_shard_allocation_count": 0, - "forecast_write_load": 6.0, - "forecast_disk_usage_bytes": 648766, - "actual_disk_usage_bytes": 648766 - } - } - }, - "routing_table": { - "test": { - "0": { - "current": [ - { - "state": "STARTED", - "primary": true, - "node": "UPYt8VwWTt-IADAEbqpLxA", - "node_is_desired": true, - "relocating_node": null, - "relocating_node_is_desired": null, - "shard_id": 0, - "index": "test", - "forecast_write_load": 8.0, - "forecast_shard_size_in_bytes": 1024, - "tier_preference": ["data_content"] - } - ], - "desired": { - "node_ids": [ - "UPYt8VwWTt-IADAEbqpLxA" - ], - "total": 1, - "unassigned": 0, - "ignored": 0 - } - }, - "1": { - "current": [ - { - "state": "STARTED", - "primary": true, - "node": "2x1VTuSOQdeguXPdN73yRw", - "node_is_desired": true, - "relocating_node": null, - "relocating_node_is_desired": false, - "shard_id": 1, - "index": "test", - "forecast_write_load": null, - "forecast_shard_size_in_bytes": null, - "tier_preference": ["data_content"] - } - ], - "desired": { - "node_ids": [ - "2x1VTuSOQdeguXPdN73yRw" - ], - "total": 1, - "unassigned": 0, - "ignored": 0 - } - } - } - }, - "cluster_info" : { - "nodes" : { - "UPYt8VwWTt-IADAEbqpLxA" : { - "node_name" : "node-1", - "least_available" : { - "path" : "/data", - "total_bytes" : 1440713945, - "used_bytes" : 1222486407, - "free_bytes" : 218227538, - "free_disk_percent" : 15.1, - "used_disk_percent" : 84.9 - }, - "most_available" : { - "path" : "/data", - "total_bytes" : 1440713945, - "used_bytes" : 1222486407, - "free_bytes" : 218227538, - "free_disk_percent" : 15.1, - "used_disk_percent" : 84.9 - } - } - }, - "shard_sizes" : { - "[test][0][p]_bytes" : 1720826288, - "[test][1][p]_bytes" : 1720826288 - }, - "shard_data_set_sizes" : { - "[test][0][p]_bytes" : 1720826288, - "[test][1][p]_bytes" : 1720826288 - }, - "shard_paths" : { - "NodeAndShard[nodeId=UPYt8VwWTt-IADAEbqpLxA, shardId=[test][0]]" : "/data", - "NodeAndShard[nodeId=bgC66tboTIeFQ0VgRGI4Gg, shardId=[test][0]]" : "/data" - }, - "reserved_sizes" : [] - } -} --------------------------------------------------- -// TEST[skip:Can't reliably test desired balance] diff --git a/docs/reference/cluster/get-desired-nodes.asciidoc b/docs/reference/cluster/get-desired-nodes.asciidoc deleted file mode 100644 index 56af6913e34ac..0000000000000 --- a/docs/reference/cluster/get-desired-nodes.asciidoc +++ /dev/null @@ -1,98 +0,0 @@ -[[get-desired-nodes]] -=== Get desired nodes API -++++ -Get desired nodes -++++ - -NOTE: {cloud-only} - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Get desired nodes. - -[[get-desired-nodes-request]] -==== {api-request-title} - -////////////////////////// -[source,console] --------------------------------------------------- -PUT /_internal/desired_nodes/my_history/1 -{ - "nodes" : [ - { - "settings" : { - "node.name" : "instance-000187", - "node.external_id": "instance-000187", - "node.roles" : ["data_hot", "master"], - "node.attr.data" : "hot", - "node.attr.logical_availability_zone" : "zone-0" - }, - "processors" : 8.0, - "memory" : "59gb", - "storage" : "2tb" - } - ] -} --------------------------------------------------- -// TESTSETUP - - -[source,console] --------------------------------------------------- -DELETE /_internal/desired_nodes --------------------------------------------------- -// TEST -// TEARDOWN - -////////////////////////// - -[source,console] --------------------------------------------------- -GET /_internal/desired_nodes/_latest --------------------------------------------------- -// TEST - -[[get-desired-nodes-desc]] -==== {api-description-title} - -This API gets the latest desired nodes. - -[[get-desired-nodes-examples]] -==== {api-examples-title} - -This example gets the latest desired nodes. - -[source,console] --------------------------------------------------- -GET /_internal/desired_nodes/_latest --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "history_id": , - "version": , - "nodes": [ - { - "settings": , - "processors": , - "memory": "", - "storage": "" - } - ] -} --------------------------------------------------- -// TEST[s//$body.history_id/] -// TEST[s//$body.version/] -// TEST[s//$body.nodes.0.settings/] -// TEST[s//$body.nodes.0.processors/] -// TEST[s//$body.nodes.0.memory/] -// TEST[s//$body.nodes.0.storage/] -// TEST[s//$body.nodes.0.node_version/] diff --git a/docs/reference/cluster/get-settings.asciidoc b/docs/reference/cluster/get-settings.asciidoc deleted file mode 100644 index 5c0fe7a2026c7..0000000000000 --- a/docs/reference/cluster/get-settings.asciidoc +++ /dev/null @@ -1,49 +0,0 @@ -[[cluster-get-settings]] -=== Cluster get settings API -++++ -Cluster get settings -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Returns cluster-wide settings. - -[source,console] ----- -GET /_cluster/settings ----- - -[[cluster-get-settings-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cluster-get-settings-api-request]] -==== {api-request-title} - -`GET /_cluster/settings` - -[[cluster-get-settings-api-desc]] -==== {api-description-title} - -By default, this API call only returns settings that have been explicitly -defined, but can also include the default settings by calling the -`include_defaults` parameter. - - -[[cluster-get-settings-api-query-params]] -==== {api-query-parms-title} - - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=flat-settings] - -`include_defaults`:: - (Optional, Boolean) If `true`, returns default cluster settings from the local node. - Defaults to `false`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] diff --git a/docs/reference/cluster/health.asciidoc b/docs/reference/cluster/health.asciidoc deleted file mode 100644 index 374dd5d4a6f82..0000000000000 --- a/docs/reference/cluster/health.asciidoc +++ /dev/null @@ -1,205 +0,0 @@ -[[cluster-health]] -=== Cluster health API -++++ -Cluster health -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Returns the health status of a cluster. - -[[cluster-health-api-request]] -==== {api-request-title} - -`GET /_cluster/health/` - -[[cluster-health-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cluster-health-api-desc]] -==== {api-description-title} - -The cluster health API returns a simple status on the health of the -cluster. You can also use the API to get the health status of only specified -data streams and indices. For data streams, the API retrieves the health status -of the stream's backing indices. - -The cluster health status is: `green`, `yellow` or `red`. On the shard level, a -`red` status indicates that the specific shard is not allocated in the cluster, -`yellow` means that the primary shard is allocated but replicas are not, and -`green` means that all shards are allocated. The index level status is -controlled by the worst shard status. The cluster status is controlled by the -worst index status. - -One of the main benefits of the API is the ability to wait until the cluster -reaches a certain high water-mark health level. For example, the following will -wait for 50 seconds for the cluster to reach the `yellow` level (if it reaches -the `green` or `yellow` status before 50 seconds elapse, it will return at that -point): - -[source,console] --------------------------------------------------- -GET /_cluster/health?wait_for_status=yellow&timeout=50s --------------------------------------------------- - -[[cluster-health-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) -Comma-separated list of data streams, indices, and index aliases used to limit -the request. Wildcard expressions (`*`) are supported. -+ -To target all data streams and indices in a cluster, omit this parameter or use -`_all` or `*`. - -[[cluster-health-api-query-params]] -==== {api-query-parms-title} - -`level`:: - (Optional, string) Can be one of `cluster`, `indices` or `shards`. Controls - the details level of the health information returned. Defaults to `cluster`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=local] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] - -`wait_for_active_shards`:: - (Optional, string) A number controlling to how many active shards to wait - for, `all` to wait for all shards in the cluster to be active, or `0` to not - wait. Defaults to `0`. - -`wait_for_events`:: - (Optional, string) Can be one of `immediate`, `urgent`, `high`, `normal`, - `low`, `languid`. Wait until all currently queued events with the given - priority are processed. - -`wait_for_no_initializing_shards`:: - (Optional, Boolean) A boolean value which controls whether to wait (until - the timeout provided) for the cluster to have no shard initializations. - Defaults to false, which means it will not wait for initializing shards. - -`wait_for_no_relocating_shards`:: - (Optional, Boolean) A boolean value which controls whether to wait (until - the timeout provided) for the cluster to have no shard relocations. Defaults - to false, which means it will not wait for relocating shards. - -`wait_for_nodes`:: - (Optional, string) The request waits until the specified number `N` of - nodes is available. It also accepts `>=N`, `<=N`, `>N` and ` `yellow` > `red`. By default, will not - wait for any status. - -[[cluster-health-api-response-body]] -==== {api-response-body-title} - -`cluster_name`:: - (string) The name of the cluster. - -`status`:: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cluster-health-status] - -`timed_out`:: - (Boolean) If `false` the response returned within the period of - time that is specified by the `timeout` parameter (`30s` by default). - -`number_of_nodes`:: - (integer) The number of nodes within the cluster. - -`number_of_data_nodes`:: - (integer) The number of nodes that are dedicated data nodes. - -`active_primary_shards`:: - (integer) The number of active primary shards. - -`active_shards`:: - (integer) The total number of active primary and replica shards. - -`relocating_shards`:: - (integer) The number of shards that are under relocation. - -`initializing_shards`:: - (integer) The number of shards that are under initialization. - -`unassigned_shards`:: - (integer) The number of shards that are not allocated. - -`unassigned_primary_shards`:: - (integer) The number of shards that are primary but not allocated. **Note**: This number may be lower than the true value if your cluster contains nodes running a version below 8.16. For a more accurate count in this scenario, please use the <>. - -`delayed_unassigned_shards`:: - (integer) The number of shards whose allocation has been delayed by the - timeout settings. - -`number_of_pending_tasks`:: - (integer) The number of cluster-level changes that have not yet been - executed. - -`number_of_in_flight_fetch`:: - (integer) The number of unfinished fetches. - -`task_max_waiting_in_queue_millis`:: - (integer) The time expressed in milliseconds since the earliest initiated task - is waiting for being performed. - -`active_shards_percent_as_number`:: - (float) The ratio of active shards in the cluster expressed as a percentage. - -[[cluster-health-api-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET _cluster/health --------------------------------------------------- -// TEST[s/^/PUT test1\n/] - -The API returns the following response in case of a quiet single node cluster -with a single index with one shard and one replica: - -[source,console-result] --------------------------------------------------- -{ - "cluster_name" : "testcluster", - "status" : "yellow", - "timed_out" : false, - "number_of_nodes" : 1, - "number_of_data_nodes" : 1, - "active_primary_shards" : 1, - "active_shards" : 1, - "relocating_shards" : 0, - "initializing_shards" : 0, - "unassigned_shards" : 1, - "unassigned_primary_shards" : 0, - "delayed_unassigned_shards": 0, - "number_of_pending_tasks" : 0, - "number_of_in_flight_fetch": 0, - "task_max_waiting_in_queue_millis": 0, - "active_shards_percent_as_number": 50.0 -} --------------------------------------------------- -// TESTRESPONSE[s/testcluster/yamlRestTest/] -// TESTRESPONSE[s/"number_of_pending_tasks" : 0,/"number_of_pending_tasks" : $body.number_of_pending_tasks,/] -// TESTRESPONSE[s/"task_max_waiting_in_queue_millis": 0/"task_max_waiting_in_queue_millis": $body.task_max_waiting_in_queue_millis/] - -The following is an example of getting the cluster health at the -`shards` level: - -[source,console] --------------------------------------------------- -GET /_cluster/health/my-index-000001?level=shards --------------------------------------------------- -// TEST[setup:my_index] diff --git a/docs/reference/cluster/nodes-hot-threads.asciidoc b/docs/reference/cluster/nodes-hot-threads.asciidoc deleted file mode 100644 index f8b414453ae66..0000000000000 --- a/docs/reference/cluster/nodes-hot-threads.asciidoc +++ /dev/null @@ -1,81 +0,0 @@ -[[cluster-nodes-hot-threads]] -=== Nodes hot threads API -++++ -Nodes hot threads -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Returns the hot threads on each selected node in the cluster. - -[[cluster-nodes-hot-threads-api-request]] -==== {api-request-title} - -`GET /_nodes/hot_threads` + - -`GET /_nodes//hot_threads` - -[[cluster-nodes-hot-threads-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cluster-nodes-hot-threads-api-desc]] -==== {api-description-title} - -This API yields a breakdown of the hot threads on each selected node in the -cluster. The output is plain text with a breakdown of each node's top hot -threads. - - -[[cluster-nodes-hot-threads-api-path-params]] -==== {api-path-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=node-id] - - -[[cluster-nodes-hot-threads-api-query-params]] -==== {api-query-parms-title} - - -`ignore_idle_threads`:: - (Optional, Boolean) If true, known idle threads (e.g. waiting in a socket - select, or to get a task from an empty queue) are filtered out. Defaults to - true. - -`interval`:: - (Optional, <>) The interval to do the second - sampling of threads. Defaults to `500ms`. - -`snapshots`:: - (Optional, integer) Number of samples of thread stacktrace. Defaults to - `10`. - -`threads`:: - (Optional, integer) Specifies the number of hot threads to provide - information for. Defaults to `3`. If you are using this API for - troubleshooting, set this parameter to a large number (e.g. - `9999`) to get information about all the threads in the system. - -`timeout`:: - (Optional, <>) Specifies how long to wait for a - response from each node. If omitted, waits forever. - -`type`:: - (Optional, string) The type to sample. Available options are `block`, `cpu`, and - `wait`. Defaults to `cpu`. - - -[[cluster-nodes-hot-threads-api-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET /_nodes/hot_threads -GET /_nodes/nodeId1,nodeId2/hot_threads --------------------------------------------------- diff --git a/docs/reference/cluster/nodes-info.asciidoc b/docs/reference/cluster/nodes-info.asciidoc deleted file mode 100644 index 7ae6db7aa9a56..0000000000000 --- a/docs/reference/cluster/nodes-info.asciidoc +++ /dev/null @@ -1,399 +0,0 @@ -[[cluster-nodes-info]] -=== Nodes info API -++++ -Nodes info -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Returns cluster nodes information. - -[[cluster-nodes-info-api-request]] -==== {api-request-title} - -`GET /_nodes` + - -`GET /_nodes/` + - -`GET /_nodes/` + - -`GET /_nodes//` - -[[cluster-nodes-info-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - - -[[cluster-nodes-info-api-desc]] -==== {api-description-title} - -The cluster nodes info API allows to retrieve one or more (or all) of -the cluster nodes information. All the nodes selective options are explained -<>. - -By default, it returns all attributes and core settings for a node. - -[role="child_attributes"] -[[cluster-nodes-info-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) -Limits the information returned to the specific metrics. Supports a -comma-separated list, such as `http,ingest`. -+ -[%collapsible%open] -.Valid values for `` -==== -`aggregations`:: -Information about the available types of aggregation. - -`http`:: -Information about the HTTP interface of this node. - -`indices`:: -+ --- -Node-level configuration related to indexing: - -* `total_indexing_buffer`: the maximum size of the indexing buffer on this node. --- - -`ingest`:: -Information about ingest pipelines and processors. - -`jvm`:: -JVM information, including its name, its version, and its configuration. - -`os`:: -Operating system information, including its name and version. - -`plugins`:: -+ --- -Details about the installed plugins and modules per node. The following -information is available for each plugin and module: - -* `name`: plugin name -* `version`: version of Elasticsearch the plugin was built for -* `description`: short description of the plugin's purpose -* `classname`: fully-qualified class name of the plugin's entry point -* `has_native_controller`: whether or not the plugin has a native controller -process --- - -`process`:: -Process information, including the numeric process ID. - -`settings`:: -Lists all node settings in use as defined in the `elasticsearch.yml` file. - -`thread_pool`:: -Information about the configuration of each thread pool. - -`transport`:: -Information about the transport interface of the node. -==== - -If you use the full `GET /_nodes//` form of this API then you -can also request the metric `_all` to retrieve all metrics, or you can request -the metric `_none` to suppress all metrics and retrieve only the identity of -the node. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=node-id] - -[[cluster-nodes-info-api-response-body]] -==== {api-response-body-title} - -`build_hash`:: - Short hash of the last git commit in this release. - -`host`:: - The node's host name. - -`ip`:: - The node's IP address. - -`name`:: - The node's name. - -`total_indexing_buffer`:: - Total heap allowed to be used to hold recently indexed - documents before they must be written to disk. This size is - a shared pool across all shards on this node, and is - controlled by <>. - -`total_indexing_buffer_in_bytes`:: - Same as `total_indexing_buffer`, but expressed in bytes. - -`transport_address`:: - Host and port where transport HTTP connections are accepted. - -`version`:: - {es} version running on this node. - -`transport_version`:: - The most recent transport version that this node can communicate with. - -`index_version`:: - The most recent index version that this node can read. - -`component_versions`:: - The version numbers of individual components loaded in this node. - -The `os` flag can be set to retrieve information that concern the operating -system: - -`os.refresh_interval_in_millis`:: - Refresh interval for the OS statistics - -`os.name`:: - Name of the operating system (ex: Linux, Windows, Mac OS X) - -`os.arch`:: - Name of the JVM architecture (ex: amd64, x86) - -`os.version`:: - Version of the operating system - -`os.available_processors`:: - Number of processors available to the Java virtual machine - -`os.allocated_processors`:: - The number of processors actually used to calculate thread pool size. This - number can be set with the <> - setting of a node and defaults to the number of processors reported by - the OS. - -The `process` flag can be set to retrieve information that concern the current -running process: - -`process.refresh_interval_in_millis`:: - Refresh interval for the process statistics - -`process.id`:: - Process identifier (PID) - -`process.mlockall`:: - Indicates if the process address space has been successfully locked in memory - - -[[cluster-nodes-info-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=flat-settings] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeout-nodes-request] - - -[[cluster-nodes-info-api-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -# return just process -GET /_nodes/process - -# same as above -GET /_nodes/_all/process - -# return just jvm and process of only nodeId1 and nodeId2 -GET /_nodes/nodeId1,nodeId2/jvm,process - -# same as above -GET /_nodes/nodeId1,nodeId2/info/jvm,process - -# return all the information of only nodeId1 and nodeId2 -GET /_nodes/nodeId1,nodeId2/_all --------------------------------------------------- - -The `_all` flag can be set to return all the information - or you can omit it. - - -[[cluster-nodes-info-api-example-plugins]] -===== Example for plugins metric - -If `plugins` is specified, the result will contain details about the installed -plugins and modules: - -[source,console] --------------------------------------------------- -GET /_nodes/plugins --------------------------------------------------- -// TEST[setup:node] - -The API returns the following response: - -[source,console-result] --------------------------------------------------- -{ - "_nodes": ... - "cluster_name": "elasticsearch", - "nodes": { - "USpTGYaBSIKbgSUJR2Z9lg": { - "name": "node-0", - "transport_address": "192.168.17:9300", - "host": "node-0.elastic.co", - "ip": "192.168.17", - "version": "{version}", - "transport_version": 100000298, - "index_version": 100000074, - "component_versions": { - "ml_config_version": 100000162, - "transform_config_version": 100000096 - }, - "build_flavor": "default", - "build_type": "{build_type}", - "build_hash": "587409e", - "roles": [ - "master", - "data", - "ingest" - ], - "attributes": {}, - "plugins": [ - { - "name": "analysis-icu", - "version": "{version}", - "description": "The ICU Analysis plugin integrates Lucene ICU module into elasticsearch, adding ICU relates analysis components.", - "classname": "org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin", - "has_native_controller": false - } - ], - "modules": [ - { - "name": "lang-painless", - "version": "{version}", - "description": "An easy, safe and fast scripting language for Elasticsearch", - "classname": "org.elasticsearch.painless.PainlessPlugin", - "has_native_controller": false - } - ] - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"_nodes": \.\.\./"_nodes": $body.$_path,/] -// TESTRESPONSE[s/"elasticsearch"/$body.cluster_name/] -// TESTRESPONSE[s/"USpTGYaBSIKbgSUJR2Z9lg"/\$node_name/] -// TESTRESPONSE[s/"name": "node-0"/"name": $body.$_path/] -// TESTRESPONSE[s/"transport_address": "192.168.17:9300"/"transport_address": $body.$_path/] -// TESTRESPONSE[s/"host": "node-0.elastic.co"/"host": $body.$_path/] -// TESTRESPONSE[s/"ip": "192.168.17"/"ip": $body.$_path/] -// TESTRESPONSE[s/"transport_version": 100000298/"transport_version": $body.$_path/] -// TESTRESPONSE[s/"index_version": 100000074/"index_version": $body.$_path/] -// TESTRESPONSE[s/"component_versions": \{[^\}]*\}/"component_versions": $body.$_path/] -// TESTRESPONSE[s/"build_hash": "587409e"/"build_hash": $body.$_path/] -// TESTRESPONSE[s/"roles": \[[^\]]*\]/"roles": $body.$_path/] -// TESTRESPONSE[s/"attributes": \{[^\}]*\}/"attributes": $body.$_path/] -// TESTRESPONSE[s/"plugins": \[[^\]]*\]/"plugins": $body.$_path/] -// TESTRESPONSE[s/"modules": \[[^\]]*\]/"modules": $body.$_path/] - - -[[cluster-nodes-info-api-example-ingest]] -===== Example for ingest metric - -If `ingest` is specified, the response contains details about the available -processors per node: - -[source,console] --------------------------------------------------- -GET /_nodes/ingest --------------------------------------------------- -// TEST[setup:node] - -The API returns the following response: - -[source,console-result] --------------------------------------------------- -{ - "_nodes": ... - "cluster_name": "elasticsearch", - "nodes": { - "USpTGYaBSIKbgSUJR2Z9lg": { - "name": "node-0", - "transport_address": "192.168.17:9300", - "host": "node-0.elastic.co", - "ip": "192.168.17", - "version": "{version}", - "transport_version": 100000298, - "index_version": 100000074, - "component_versions": { - "ml_config_version": 100000162, - "transform_config_version": 100000096 - }, - "build_flavor": "default", - "build_type": "{build_type}", - "build_hash": "587409e", - "roles": [], - "attributes": {}, - "ingest": { - "processors": [ - { - "type": "date" - }, - { - "type": "uppercase" - }, - { - "type": "set" - }, - { - "type": "lowercase" - }, - { - "type": "gsub" - }, - { - "type": "convert" - }, - { - "type": "remove" - }, - { - "type": "fail" - }, - { - "type": "foreach" - }, - { - "type": "split" - }, - { - "type": "trim" - }, - { - "type": "rename" - }, - { - "type": "join" - }, - { - "type": "append" - } - ] - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"_nodes": \.\.\./"_nodes": $body.$_path,/] -// TESTRESPONSE[s/"elasticsearch"/$body.cluster_name/] -// TESTRESPONSE[s/"USpTGYaBSIKbgSUJR2Z9lg"/\$node_name/] -// TESTRESPONSE[s/"name": "node-0"/"name": $body.$_path/] -// TESTRESPONSE[s/"transport_address": "192.168.17:9300"/"transport_address": $body.$_path/] -// TESTRESPONSE[s/"host": "node-0.elastic.co"/"host": $body.$_path/] -// TESTRESPONSE[s/"ip": "192.168.17"/"ip": $body.$_path/] -// TESTRESPONSE[s/"transport_version": 100000298/"transport_version": $body.$_path/] -// TESTRESPONSE[s/"index_version": 100000074/"index_version": $body.$_path/] -// TESTRESPONSE[s/"component_versions": \{[^\}]*\}/"component_versions": $body.$_path/] -// TESTRESPONSE[s/"build_hash": "587409e"/"build_hash": $body.$_path/] -// TESTRESPONSE[s/"roles": \[[^\]]*\]/"roles": $body.$_path/] -// TESTRESPONSE[s/"attributes": \{[^\}]*\}/"attributes": $body.$_path/] -// TESTRESPONSE[s/"processors": \[[^\]]*\]/"processors": $body.$_path/] diff --git a/docs/reference/cluster/nodes-reload-secure-settings.asciidoc b/docs/reference/cluster/nodes-reload-secure-settings.asciidoc deleted file mode 100644 index 842ca30c335f9..0000000000000 --- a/docs/reference/cluster/nodes-reload-secure-settings.asciidoc +++ /dev/null @@ -1,104 +0,0 @@ -[[cluster-nodes-reload-secure-settings]] -=== Nodes reload secure settings API -++++ -Nodes reload secure settings -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Reloads the keystore on nodes in the cluster. - -[[cluster-nodes-reload-secure-settings-api-request]] -==== {api-request-title} - -`POST /_nodes/reload_secure_settings` + -`POST /_nodes//reload_secure_settings` - -[[cluster-nodes-reload-secure-settings-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `manage` -<> to use this API. - -[[cluster-nodes-reload-secure-settings-api-desc]] -==== {api-description-title} - -<> are stored in an on-disk keystore. Certain -of these settings are <>. That is, you -can change them on disk and reload them without restarting any nodes in the -cluster. When you have updated reloadable secure settings in your keystore, you -can use this API to reload those settings on each node. - -When the {es} keystore is password protected and not simply obfuscated, you must -provide the password for the keystore when you reload the secure settings. -Reloading the settings for the whole cluster assumes that all nodes' keystores -are protected with the same password; this method is allowed only when -<>. Alternatively, you can -reload the secure settings on each node by locally accessing the API and passing -the node-specific {es} keystore password. - -[[cluster-nodes-reload-secure-settings-path-params]] -==== {api-path-parms-title} - -``:: - (Optional, string) The names of particular nodes in the cluster to target. - For example, `nodeId1,nodeId2`. For node selection options, see - <>. - -NOTE: {es} requires consistent secure settings across the cluster nodes, but -this consistency is not enforced. Hence, reloading specific nodes is not -standard. It is justifiable only when retrying failed reload operations. - -[[cluster-nodes-reload-secure-settings-api-request-body]] -==== {api-request-body-title} - -`secure_settings_password`:: - (Optional, string) The password for the {es} keystore. - -[[cluster-nodes-reload-secure-settings-api-example]] -==== {api-examples-title} - -The following examples assume a common password for the {es} keystore on every -node of the cluster: - -[source,console] --------------------------------------------------- -POST _nodes/reload_secure_settings -{ - "secure_settings_password":"keystore-password" -} -POST _nodes/nodeId1,nodeId2/reload_secure_settings -{ - "secure_settings_password":"keystore-password" -} --------------------------------------------------- -// TEST[setup:node] -// TEST[s/nodeId1,nodeId2/*/] - -The response contains the `nodes` object, which is a map, keyed by the -node id. Each value has the node `name` and an optional `reload_exception` -field. The `reload_exception` field is a serialization of the exception -that was thrown during the reload process, if any. - -[source,console-result] --------------------------------------------------- -{ - "_nodes": { - "total": 1, - "successful": 1, - "failed": 0 - }, - "cluster_name": "my_cluster", - "nodes": { - "pQHNt5rXTTWNvUgOrdynKg": { - "name": "node-0" - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"my_cluster"/$body.cluster_name/] -// TESTRESPONSE[s/"pQHNt5rXTTWNvUgOrdynKg"/\$node_name/] diff --git a/docs/reference/cluster/nodes-stats.asciidoc b/docs/reference/cluster/nodes-stats.asciidoc deleted file mode 100644 index 522983035079d..0000000000000 --- a/docs/reference/cluster/nodes-stats.asciidoc +++ /dev/null @@ -1,2605 +0,0 @@ -[[cluster-nodes-stats]] -=== Nodes stats API - -++++ -Nodes stats -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Returns cluster nodes statistics. - -[[cluster-nodes-stats-api-request]] -==== {api-request-title} - -`GET /_nodes/stats` + - -`GET /_nodes//stats` + - -`GET /_nodes/stats/` + - -`GET /_nodes//stats/` + - -`GET /_nodes/stats//` + - -`GET /_nodes//stats//` - -[[cluster-nodes-stats-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cluster-nodes-stats-api-desc]] -==== {api-description-title} - -You can use the cluster nodes stats API to retrieve statistics for nodes in a cluster. - -All the nodes selective options are explained <>. - -By default, all stats are returned. -You can limit the returned information by using metrics. - -[[cluster-nodes-stats-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Limits the information returned to the specific metrics. -A comma-separated list of the following options: -+ --- -`adaptive_selection`:: -Statistics about <>. - -`allocations`:: -Statistics about allocated shards - -`breaker`:: -Statistics about the field data circuit breaker. - -`discovery`:: -Statistics about the discovery. - -`fs`:: -File system information, data path, free disk space, read/write stats. - -`http`:: -HTTP connection information. - -`indexing_pressure`:: -Statistics about the node's indexing load and related rejections. - -`indices`:: -Indices stats about size, document count, indexing and deletion times, search times, field cache size, merges and flushes. - -`ingest`:: -Statistics about ingest preprocessing. - -`jvm`:: -JVM stats, memory pool information, garbage collection, buffer pools, number of loaded/unloaded classes. - -`os`:: -Operating system stats, load average, mem, swap. - -`process`:: -Process statistics, memory consumption, cpu usage, open file descriptors. - -`repositories`:: -Statistics about snapshot repositories. - -`thread_pool`:: -Statistics about each thread pool, including current size, queue and rejected tasks. - -`transport`:: -Transport statistics about sent and received bytes in cluster communication. --- - -``:: -(Optional, string) Limit the information returned for `indices` metric to the specific index metrics. -It can be used only if `indices` (or `all`) metric is specified. -Supported metrics are: -+ --- -* `bulk` -* `completion` -* `docs` -* `fielddata` -* `flush` -* `get` -* `indexing` -* `mappings` -* `merge` -* `query_cache` -* `recovery` -* `refresh` -* `request_cache` -* `search` -* `segments` -* `shard_stats` -* `store` -* `translog` -* `warmer` -* `dense_vector` -* `sparse_vector` --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=node-id] - -[[cluster-nodes-stats-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=completion-fields] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=fielddata-fields] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=fields] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=groups] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=level] - -`types`:: -(Optional, string) A comma-separated list of document types for the -`indexing` index metric. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeout-nodes-request] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=include-segment-file-sizes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=include-unloaded-segments] - -[role="child_attributes"] -[[cluster-nodes-stats-api-response-body]] -==== {api-response-body-title} - -`_nodes`:: -(object) Contains statistics about the number of nodes selected by the request. -+ -.Properties of `_nodes` -[%collapsible%open] -==== -`total`:: -(integer) Total number of nodes selected by the request. - -`successful`:: -(integer) Number of nodes that responded successfully to the request. - -`failed`:: -(integer) Number of nodes that rejected the request or failed to respond. -If this value is not `0`, a reason for the rejection or failure is included in the response. - -==== - -`cluster_name`:: -(string) Name of the cluster. -Based on the <> setting. - -`nodes`:: -(object) Contains statistics for the nodes selected by the request. -+ -.Properties of `nodes` -[%collapsible%open] -==== - -``:: -(object) Contains statistics for the node. -+ -.Properties of `` -[%collapsible%open] -===== -`timestamp`:: -(integer) Time the node stats were collected for this response. -Recorded in milliseconds since the {wikipedia}/Unix_time[Unix Epoch]. - -`name`:: -(string) Human-readable identifier for the node. -Based on the <> setting. - -`transport_address`:: -(string) Host and port for the <>, used for internal communication between nodes in a cluster. - -`host`:: -(string) Network host for the node, based on the <> setting. - -`ip`:: -(string) IP address and port for the node. - -`roles`:: -(array of strings) Roles assigned to the node. -See <>. - -`attributes`:: -(object) Contains a list of attributes for the node. - -[[cluster-nodes-stats-api-response-body-indices]] -`indices`:: -(object) Contains statistics about indices with shards assigned to the node. -+ -.Properties of `indices` -[%collapsible%open] -====== -`docs`:: -(object) Contains statistics about documents across all primary shards assigned to the node. -+ -.Properties of `docs` -[%collapsible%open] -======= -`count`:: -(integer) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=docs-count] - -`deleted`:: -(integer) -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=docs-deleted] - -======= - -`store`:: -(object) Contains statistics about the size of shards assigned to the node. -+ -.Properties of `store` -[%collapsible%open] -======= - -`size`:: -(<>) Total size of all shards assigned to the node. - -`size_in_bytes`:: -(integer) Total size, in bytes, of all shards assigned to the node. - -`total_data_set_size`:: -(<>) Total data set size of all shards assigned to the node. -This includes the size of shards not stored fully on the node, such as the cache for <>. - -`total_data_set_size_in_bytes`:: -(integer) Total data set size, in bytes, of all shards assigned to the node. -This includes the size of shards not stored fully on the node, such as the cache for <>. - -`reserved`:: -(<>) A prediction of how much larger the shard stores on this node will eventually grow due to ongoing peer recoveries, restoring snapshots, and similar activities. -A value of `-1b` indicates that this is not available. - -`reserved_in_bytes`:: -(integer) A prediction, in bytes, of how much larger the shard stores on this node will eventually grow due to ongoing peer recoveries, restoring snapshots, and similar activities. -A value of `-1` indicates that this is not available. - -======= - -`indexing`:: -(object) Contains statistics about indexing operations for the node. -+ -.Properties of `indexing` -[%collapsible%open] -======= - -`index_total`:: -(integer) Total number of indexing operations. - -`index_time`:: -(<>) Total time spent performing indexing operations. - -`index_time_in_millis`:: -(integer) Total time in milliseconds spent performing indexing operations. - -`index_current`:: -(integer) Number of indexing operations currently running. - -`index_failed`:: -(integer) Number of failed indexing operations. - -`delete_total`:: -(integer) Total number of deletion operations. - -`delete_time`:: -(<>) Time spent performing deletion operations. - -`delete_time_in_millis`:: -(integer) Time in milliseconds spent performing deletion operations. - -`delete_current`:: -(integer) Number of deletion operations currently running. - -`noop_update_total`:: -(integer) Total number of noop operations. - -`is_throttled`:: -(Boolean) Number of times operations were throttled. - -`throttle_time`:: -(<>) Total time spent throttling operations. - -`throttle_time_in_millis`:: -(integer) Total time in milliseconds spent throttling operations. - -`write_load`:: -(double) Average number of write threads used while indexing documents. - -======= - -`get`:: -(object) Contains statistics about get operations for the node. -+ -.Properties of `get` -[%collapsible%open] -======= - -`total`:: -(integer) Total number of get operations. - -`getTime`:: -(<>) Time spent performing get operations. - -`time_in_millis`:: -(integer) Time in milliseconds spent performing get operations. - -`exists_total`:: -(integer) Total number of successful get operations. - -`exists_time`:: -(<>) Time spent performing successful get operations. - -`exists_time_in_millis`:: -(integer) Time in milliseconds spent performing successful get operations. - -`missing_total`:: -(integer) Total number of failed get operations. - -`missing_time`:: -(<>) Time spent performing failed get operations. - -`missing_time_in_millis`:: -(integer) Time in milliseconds spent performing failed get operations. - -`current`:: -(integer) Number of get operations currently running. - -======= - -`search`:: -(object) Contains statistics about search operations for the node. -+ -.Properties of `search` -[%collapsible%open] -======= - -`open_contexts`:: -(integer) Number of open search contexts. - -`query_total`:: -(integer) Total number of query operations. - -`query_time`:: -(<>) Time spent performing query operations. - -`query_time_in_millis`:: -(integer) Time in milliseconds spent performing query operations. - -`query_current`:: -(integer) Number of query operations currently running. - -`fetch_total`:: -(integer) Total number of fetch operations. - -`fetch_time`:: -(<>) Time spent performing fetch operations. - -`fetch_time_in_millis`:: -(integer) Time in milliseconds spent performing fetch operations. - -`fetch_current`:: -(integer) Number of fetch operations currently running. - -`scroll_total`:: -(integer) Total number of scroll operations. - -`scroll_time`:: -(<>) Time spent performing scroll operations. - -`scroll_time_in_millis`:: -(integer) Time in milliseconds spent performing scroll operations. - -`scroll_current`:: -(integer) Number of scroll operations currently running. - -`suggest_total`:: -(integer) Total number of suggest operations. - -`suggest_time`:: -(<>) Time spent performing suggest operations. - -`suggest_time_in_millis`:: -(integer) Time in milliseconds spent performing suggest operations. - -`suggest_current`:: -(integer) Number of suggest operations currently running. - -======= - -`merges`:: -(object) Contains statistics about merge operations for the node. -+ -.Properties of `merges` -[%collapsible%open] -======= - -`current`:: -(integer) Number of merge operations currently running. - -`current_docs`:: -(integer) Number of document merges currently running. - -`current_size`:: -(<>) Memory used performing current document merges. - -`current_size_in_bytes`:: -(integer) Memory, in bytes, used performing current document merges. - -`total`:: -(integer) Total number of merge operations. - -`total_time`:: -(<>) Total time spent performing merge operations. - -`total_time_in_millis`:: -(integer) Total time in milliseconds spent performing merge operations. - -`total_docs`:: -(integer) Total number of merged documents. - -`total_size`:: -(<>) Total size of document merges. - -`total_size_in_bytes`:: -(integer) Total size of document merges in bytes. - -`total_stopped_time`:: -(<>) Total time spent stopping merge operations. - -`total_stopped_time_in_millis`:: -(integer) Total time in milliseconds spent stopping merge operations. - -`total_throttled_time`:: -(<>) Total time spent throttling merge operations. - -`total_throttled_time_in_millis`:: -(integer) Total time in milliseconds spent throttling merge operations. - -`total_auto_throttle`:: -(<>) Size of automatically throttled merge operations. - -`total_auto_throttle_in_bytes`:: -(integer) Size, in bytes, of automatically throttled merge operations. - -======= - -`refresh`:: -(object) Contains statistics about refresh operations for the node. -+ -.Properties of `refresh` -[%collapsible%open] -======= - -`total`:: -(integer) Total number of refresh operations. - -`total_time`:: -(<>) Total time spent performing refresh operations. - -`total_time_in_millis`:: -(integer) Total time in milliseconds spent performing refresh operations. - -`external_total`:: -(integer) Total number of external refresh operations. - -`external_total_time`:: -(<>) Total time spent performing external operations. - -`external_total_time_in_millis`:: -(integer) Total time in milliseconds spent performing external operations. - -`listeners`:: -(integer) Number of refresh listeners. - -======= - -`flush`:: -(object) Contains statistics about flush operations for the node. -+ -.Properties of `flush` -[%collapsible%open] -======= - -`total`:: -(integer) Number of flush operations. - -`periodic`:: -(integer) Number of flush periodic operations. - -`total_time`:: -(<>) Total time spent performing flush operations. - -`total_time_in_millis`:: -(integer) Total time in milliseconds spent performing flush operations. - -======= - -`warmer`:: -(object) Contains statistics about index warming operations for the node. -+ -.Properties of `warmer` -[%collapsible%open] -======= - -`current`:: -(integer) Number of active index warmers. - -`total`:: -(integer) Total number of index warmers. - -`total_time`:: -(<>) Total time spent performing index warming operations. - -`total_time_in_millis`:: -(integer) Total time in milliseconds spent performing index warming operations. - -======= - -`query_cache`:: -(object) Contains statistics about the query cache across all shards assigned to the node. -+ -.Properties of `query_cache` -[%collapsible%open] -======= - -`memory_size`:: -(<>) Total amount of memory used for the query cache across all shards assigned to the node. - -`memory_size_in_bytes`:: -(integer) Total amount of memory, in bytes, used for the query cache across all shards assigned to the node. - -`total_count`:: -(integer) Total count of hits, misses, and cached queries in the query cache. - -`hit_count`:: -(integer) Number of query cache hits. - -`miss_count`:: -(integer) Number of query cache misses. - -`cache_size`:: -(integer) Current number of cached queries. - -`cache_count`:: -(integer) Total number of all queries that have been cached. - -`evictions`:: -(integer) Number of query cache evictions. - -======= - -`fielddata`:: -(object) Contains statistics about the field data cache across all shards assigned to the node. -+ -.Properties of `fielddata` -[%collapsible%open] -======= - -`memory_size`:: -(<>) Total amount of memory used for the field data cache across all shards assigned to the node. - -`memory_size_in_bytes`:: -(integer) Total amount of memory, in bytes, used for the field data cache across all shards assigned to the node. - -`evictions`:: -(integer) Number of fielddata evictions. - -======= - -`completion`:: -(object) Contains statistics about completions across all shards assigned to the node. -+ -.Properties of `completion` -[%collapsible%open] -======= - -`size`:: -(<>) Total amount of memory used for completion across all shards assigned to the node. - -`size_in_bytes`:: -(integer) Total amount of memory, in bytes, used for completion across all shards assigned to the node. - -======= - -`segments`:: -(object) Contains statistics about segments across all shards assigned to the node. -+ -.Properties of `segments` -[%collapsible%open] -======= - -`count`:: -(integer) Number of segments. - -`memory`:: -(<>) Total amount of memory used for segments across all shards assigned to the node. - -`memory_in_bytes`:: -(integer) Total amount of memory, in bytes, used for segments across all shards assigned to the node. - -`terms_memory`:: -(<>) Total amount of memory used for terms across all shards assigned to the node. - -`terms_memory_in_bytes`:: -(integer) Total amount of memory, in bytes, used for terms across all shards assigned to the node. - -`stored_fields_memory`:: -(<>) Total amount of memory used for stored fields across all shards assigned to the node. - -`stored_fields_memory_in_bytes`:: -(integer) Total amount of memory, in bytes, used for stored fields across all shards assigned to the node. - -`term_vectors_memory`:: -(<>) Total amount of memory used for term vectors across all shards assigned to the node. - -`term_vectors_memory_in_bytes`:: -(integer) Total amount of memory, in bytes, used for term vectors across all shards assigned to the node. - -`norms_memory`:: -(<>) Total amount of memory used for normalization factors across all shards assigned to the node. - -`norms_memory_in_bytes`:: -(integer) Total amount of memory, in bytes, used for normalization factors across all shards assigned to the node. - -`points_memory`:: -(<>) Total amount of memory used for points across all shards assigned to the node. - -`points_memory_in_bytes`:: -(integer) Total amount of memory, in bytes, used for points across all shards assigned to the node. - -`doc_values_memory`:: -(<>) Total amount of memory used for doc values across all shards assigned to the node. - -`doc_values_memory_in_bytes`:: -(integer) Total amount of memory, in bytes, used for doc values across all shards assigned to the node. - -`index_writer_memory`:: -(<>) Total amount of memory used by all index writers across all shards assigned to the node. - -`index_writer_memory_in_bytes`:: -(integer) Total amount of memory, in bytes, used by all index writers across all shards assigned to the node. - -`version_map_memory`:: -(<>) Total amount of memory used by all version maps across all shards assigned to the node. - -`version_map_memory_in_bytes`:: -(integer) Total amount of memory, in bytes, used by all version maps across all shards assigned to the node. - -`fixed_bit_set`:: -(<>) Total amount of memory used by fixed bit sets across all shards assigned to the node. -+ -Fixed bit sets are used for nested object field types and type filters for <> fields. - -`fixed_bit_set_memory_in_bytes`:: -(integer) Total amount of memory, in bytes, used by fixed bit sets across all shards assigned to the node. -+ -Fixed bit sets are used for nested object field types and type filters for <> fields. - -`max_unsafe_auto_id_timestamp`:: -(integer) Time of the most recently retried indexing request. -Recorded in milliseconds since the {wikipedia}/Unix_time[Unix Epoch]. - -`file_sizes`:: -(object) Contains statistics about the size of the segment file. -+ -.Properties of `file_sizes` -[%collapsible%open] -======== -`size`:: -(<>) Size of the segment file. - -`size_in_bytes`:: -(integer) Size, in bytes, of the segment file. - -`description`:: -(string) Description of the segment file. - -======== -======= - -`translog`:: -(object) Contains statistics about transaction log operations for the node. -+ -.Properties of `translog` -[%collapsible%open] -======= - -`operations`:: -(integer) Number of transaction log operations. - -`size`:: -(<>) Size of the transaction log. - -`size_in_bytes`:: -(integer) Size, in bytes, of the transaction log. - -`uncommitted_operations`:: -(integer) Number of uncommitted transaction log operations. - -`uncommitted_size`:: -(<>) Size of uncommitted transaction log operations. - -`uncommitted_size_in_bytes`:: -(integer) Size, in bytes, of uncommitted transaction log operations. - -`earliest_last_modified_age`:: -(integer) Earliest last modified age for the transaction log. - -======= - -`request_cache`:: -(object) Contains statistics about the request cache across all shards assigned to the node. -+ -.Properties of `request_cache` -[%collapsible%open] -======= - -`memory_size`:: -(<>) Memory used by the request cache. - -`memory_size_in_bytes`:: -(integer) Memory, in bytes, used by the request cache. - -`evictions`:: -(integer) Number of request cache operations. - -`hit_count`:: -(integer) Number of request cache hits. - -`miss_count`:: -(integer) Number of request cache misses. - -======= - -`recovery`:: -(object) Contains statistics about recovery operations for the node. -+ -.Properties of `recovery` -[%collapsible%open] -======= - -`current_as_source`:: -(integer) Number of recoveries that used an index shard as a source. - -`current_as_target`:: -(integer) Number of recoveries that used an index shard as a target. - -`throttle_time`:: -(<>) Time by which recovery operations were delayed due to throttling. - -`throttle_time_in_millis`:: -(integer) Time in milliseconds recovery operations were delayed due to throttling. - -======= - -`shard_stats`:: -(object) Contains statistics about all shards assigned to the node. -+ -.Properties of `shard_stats` -[%collapsible%open] -======= - -`total_count`:: -(integer) The total number of shards assigned to the node. - -======= - -`mappings`:: -(object) Contains statistics about the mappings for the node. -This is not shown for the `shards` level, since mappings may be shared across the shards of an index on a node. -+ -.Properties of `mappings` -[%collapsible%open] -======= - -`total_count`:: -(integer) Number of mappings, including <> and <> fields. - -`total_estimated_overhead`:: -(<>) Estimated heap overhead of mappings on this node, which allows for 1kiB of heap for every mapped field. - -`total_estimated_overhead_in_bytes`:: -(integer) Estimated heap overhead, in bytes, of mappings on this node, which allows for 1kiB of heap for every mapped field. - -`total_segments`:: -(integer) Estimated number of Lucene segments on this node - -`total_segment_fields`:: -(integer) Estimated number of fields at the segment level on this node - -`average_fields_per_segment`:: -(integer) Estimated average number of fields per segment on this node -======= - -`dense_vector`:: -(object) Contains statistics about dense_vector across all shards assigned to the node. -+ -.Properties of `dense_vector` -[%collapsible%open] -======= - -`value_count`:: -(integer) Total number of dense vector indexed across all shards assigned to the node. - -======= - -`sparse_vector`:: -(object) Contains statistics about sparse_vector across all shards assigned to the node. -+ -.Properties of `sparse_vector` -[%collapsible%open] -======= - -`value_count`:: -(integer) Total number of sparse vector indexed across all shards assigned to the node. - -======= - -`shards`:: -(object) When the `shards` level is requested, contains the aforementioned `indices` statistics for every shard (per -index, and then per shard ID), as well as the following shard-specific statistics (which are not shown when the -requested level is higher than `shards`): -+ -.Additional shard-specific statistics for the `shards` level -[%collapsible%open] -======= - -`routing`:: -(object) Contains routing information about the shard. -+ -.Properties of `routing` -[%collapsible%open] -======== - -`state`:: -(string) State of the shard. Returned values are: -+ -* `INITIALIZING`: The shard is initializing/recovering. -* `RELOCATING`: The shard is relocating. -* `STARTED`: The shard has started. -* `UNASSIGNED`: The shard is not assigned to any node. - -`primary`:: -(Boolean) Whether the shard is a primary shard or not. - -`node`:: -(string) ID of the node the shard is allocated to. - -`relocating_node`:: -(string) ID of the node the shard is either relocating to or relocating from, or null if shard is not relocating. - -======== - -`commit`:: -(object) Contains information regarding the last commit point of the shard. -+ -.Properties of `commit` -[%collapsible%open] -======== - -`id`:: -(string) Base64 version of the commit ID. - -`generation`:: -(integer) Lucene generation of the commit. - -`user_data`:: -(object) Contains additional technical information about the commit. - -`num_docs`:: -(integer) The number of docs in the commit. - -======== - -`seq_no`:: -(object) Contains information about <> and checkpoints for the shard. -+ -.Properties of `seq_no` -[%collapsible%open] -======== - -`max_seq_no`:: -(integer) The maximum sequence number issued so far. - -`local_checkpoint`:: -(integer) The current local checkpoint of the shard. - -`global_checkpoint`:: -(integer) The current global checkpoint of the shard. - -======== - -`retention_leases`:: -(object) Contains information about <>. -+ -.Properties of `retention_leases` -[%collapsible%open] -======== - -`primary_term`:: -(integer) The primary term of this retention lease collection. - -`version`:: -(integer) The current version of the retention lease collection. - -`leases`:: -(array of objects) List of current leases for this shard. -+ -.Properties of `leases` -[%collapsible%open] -========= - -`id`:: -(string) The ID of the lease. - -`retaining_seq_no`:: -(integer) The minimum sequence number to be retained by the lease. - -`timestamp`:: -(integer) The timestamp of when the lease was created or renewed. -Recorded in milliseconds since the {wikipedia}/Unix_time[Unix Epoch]. - -`source`:: -(string) The source of the lease. - -========= -======== - -`shard_path`:: -(object) -+ -.Properties of `shard_path` -[%collapsible%open] -======== - -`state_path`:: -(string) The state-path root, without the index name and the shard ID. - -`data_path`:: -(string) The data-path root, without the index name and the shard ID. - -`is_custom_data_path`:: -(boolean) Whether the data path is a custom data location and therefore outside of the nodes configured data paths. - -======== - -`search_idle`:: -(boolean) Whether the shard is <> or not. - -`search_idle_time`:: -(integer) Time since previous searcher access. -Recorded in milliseconds. - -======= -====== - -[[cluster-nodes-stats-api-response-body-os]] -`os`:: -(object) Contains statistics about the operating system for the node. -+ -.Properties of `os` -[%collapsible%open] -====== - -`timestamp`:: -(integer) Last time the operating system statistics were refreshed. -Recorded in milliseconds since the {wikipedia}/Unix_time[Unix Epoch]. - -`cpu`:: -(object) Contains statistics about CPU usage for the node. -+ -.Properties of `cpu` -[%collapsible%open] -======= - -`percent`:: -(integer) Recent CPU usage for the whole system, or `-1` if not supported. - -`load_average`:: -(object) Contains statistics about load averages on the system. -+ -.Properties of `load_average` -[%collapsible%open] -======== - -`1m`:: -(float) One-minute load average on the system (field is not present if one-minute load average is not available). - -`5m`:: -(float) Five-minute load average on the system (field is not present if five-minute load average is not available). - -`15m`:: -(float) Fifteen-minute load average on the system (field is not present if fifteen-minute load average is not available). - -======== -======= - -`mem`:: -(object) Contains statistics about memory usage for the node. -+ -.Properties of `mem` -[%collapsible%open] -======= - -`total`:: -(<>) Total amount of physical memory. - -`total_in_bytes`:: -(integer) Total amount of physical memory in bytes. - -`adjusted_total`:: -(<>) If the amount of physical memory has been overridden using the `es.total_memory_bytes` -system property then this reports the overridden value. -Otherwise it reports the same value as `total`. - -`adjusted_total_in_bytes`:: -(integer) If the amount of physical memory has been overridden using the `es.total_memory_bytes` -system property then this reports the overridden value in bytes. -Otherwise it reports the same value as `total_in_bytes`. - -`free`:: -(<>) Amount of free physical memory. - -`free_in_bytes`:: -(integer) Amount of free physical memory in bytes. - -`used`:: -(<>) Amount of used physical memory. - -`used_in_bytes`:: -(integer) Amount of used physical memory in bytes. - -`free_percent`:: -(integer) Percentage of free memory. - -`used_percent`:: -(integer) Percentage of used memory. - -======= - -`swap`:: -(object) Contains statistics about swap space for the node. -+ -.Properties of `swap` -[%collapsible%open] -======= - -`total`:: -(<>) Total amount of swap space. - -`total_in_bytes`:: -(integer) Total amount of swap space in bytes. - -`free`:: -(<>) Amount of free swap space. - -`free_in_bytes`:: -(integer) Amount of free swap space in bytes. - -`used`:: -(<>) Amount of used swap space. - -`used_in_bytes`:: -(integer) Amount of used swap space in bytes. - -======= - -`cgroup` (Linux only):: -(object) Contains cgroup statistics for the node. -+ -NOTE: For the cgroup stats to be visible, cgroups must be compiled into the kernel, the `cpu` and `cpuacct` cgroup subsystems must be configured and stats must be readable from `/sys/fs/cgroup/cpu` and `/sys/fs/cgroup/cpuacct`. -+ -.Properties of `cgroup` -[%collapsible%open] -======= - -`cpuacct` (Linux only):: -(object) Contains statistics about `cpuacct` control group for the node. -+ -.Properties of `cpuacct` -[%collapsible%open] -======== - -`control_group` (Linux only):: -(string) The `cpuacct` control group to which the {es} process belongs. - -`usage_nanos` (Linux only):: -(integer) The total CPU time (in nanoseconds) consumed by all tasks in the same cgroup as the {es} process. - -======== - -`cpu` (Linux only):: -(object) Contains statistics about `cpu` control group for the node. -+ -.Properties of `cpu` -[%collapsible%open] -======== - -`control_group` (Linux only):: -(string) The `cpu` control group to which the {es} process belongs. - -`cfs_period_micros` (Linux only):: -(integer) The period of time (in microseconds) for how regularly all tasks in the same cgroup as the {es} process should have their access to CPU resources reallocated. - -`cfs_quota_micros` (Linux only):: -(integer) The total amount of time (in microseconds) for which all tasks in the same cgroup as the {es} process can run during one period -`cfs_period_micros`. - -`stat` (Linux only):: -(object) Contains CPU statistics for the node. -+ -.Properties of `stat` -[%collapsible%open] -========= -`number_of_elapsed_periods` (Linux only):: -(integer) The number of reporting periods (as specified by -`cfs_period_micros`) that have elapsed. - -`number_of_times_throttled` (Linux only):: -(integer) The number of times all tasks in the same cgroup as the {es} process have been throttled. - -`time_throttled_nanos` (Linux only):: -(integer) The total amount of time (in nanoseconds) for which all tasks in the same cgroup as the {es} process have been throttled. - -========= -======== - -`memory` (Linux only):: -(object) Contains statistics about the `memory` control group for the node. -+ -.Properties of `memory` -[%collapsible%open] -======== - -`control_group` (Linux only):: -(string) The `memory` control group to which the {es} process belongs. - -`limit_in_bytes` (Linux only):: -(string) The maximum amount of user memory (including file cache) allowed for all tasks in the same cgroup as the {es} process. -This value can be too big to store in a `long`, so is returned as a string so that the value returned can exactly match what the underlying operating system interface returns. -Any value that is too large to parse into a `long` almost certainly means no limit has been set for the cgroup. - -`usage_in_bytes` (Linux only):: -(string) The total current memory usage by processes in the cgroup (in bytes) by all tasks in the same cgroup as the {es} process. -This value is stored as a string for consistency with `limit_in_bytes`. - -======== -======= -====== - -[[cluster-nodes-stats-api-response-body-process]] -`process`:: -(object) Contains process statistics for the node. -+ -.Properties of `process` -[%collapsible%open] -====== - -`timestamp`:: -(integer) Last time the statistics were refreshed. -Recorded in milliseconds since the {wikipedia}/Unix_time[Unix Epoch]. - -`open_file_descriptors`:: -(integer) Number of opened file descriptors associated with the current or -`-1` if not supported. - -`max_file_descriptors`:: -(integer) Maximum number of file descriptors allowed on the system, or `-1` if not supported. - -`cpu`:: -(object) Contains CPU statistics for the node. -+ -.Properties of `cpu` -[%collapsible%open] -======= - -`percent`:: -(integer) CPU usage in percent, or `-1` if not known at the time the stats are computed. - -`total`:: -(<>) CPU time used by the process on which the Java virtual machine is running. - -`total_in_millis`:: -(integer) CPU time (in milliseconds) used by the process on which the Java virtual machine is running, or `-1` if not supported. - -======= - -`mem`:: -(object) Contains virtual memory statistics for the node. -+ -.Properties of `mem` -[%collapsible%open] -======= - -`total_virtual`:: -(<>) Size of virtual memory that is guaranteed to be available to the running process. - -`total_virtual_in_bytes`:: -(integer) Size in bytes of virtual memory that is guaranteed to be available to the running process. - -======= -====== - -[[cluster-nodes-stats-api-response-body-jvm]] -`jvm`:: -(object) Contains Java Virtual Machine (JVM) statistics for the node. -+ -.Properties of `jvm` -[%collapsible%open] -====== - -`timestamp`:: -(integer) Last time JVM statistics were refreshed. - -`uptime`:: -(<>) Human-readable JVM uptime. -Only returned if the -<<_human_readable_output,`human`>> query parameter is `true`. - -`uptime_in_millis`:: -(integer) JVM uptime in milliseconds. - -`mem`:: -(object) Contains JVM memory usage statistics for the node. -+ -.Properties of `mem` -[%collapsible%open] -======= - -`heap_used`:: -(<>) Memory currently in use by the heap. - -`heap_used_in_bytes`:: -(integer) Memory, in bytes, currently in use by the heap. - -`heap_used_percent`:: -(integer) Percentage of memory currently in use by the heap. - -`heap_committed`:: -(<>) Amount of memory available for use by the heap. - -`heap_committed_in_bytes`:: -(integer) Amount of memory, in bytes, available for use by the heap. - -`heap_max`:: -(<>) Maximum amount of memory available for use by the heap. - -`heap_max_in_bytes`:: -(integer) Maximum amount of memory, in bytes, available for use by the heap. - -`non_heap_used`:: -(<>) Non-heap memory used. - -`non_heap_used_in_bytes`:: -(integer) Non-heap memory used, in bytes. - -`non_heap_committed`:: -(<>) Amount of non-heap memory available. - -`non_heap_committed_in_bytes`:: -(integer) Amount of non-heap memory available, in bytes. - -`pools`:: -(object) Contains statistics about heap memory usage for the node. -+ -.Properties of `pools` -[%collapsible%open] -======== - -`young`:: -(object) Contains statistics about memory usage by the young generation heap for the node. -+ -.Properties of `young` -[%collapsible%open] -========= - -`used`:: -(<>) Memory used by the young generation heap. - -`used_in_bytes`:: -(integer) Memory, in bytes, used by the young generation heap. - -`max`:: -(<>) Maximum amount of memory available for use by the young generation heap. - -`max_in_bytes`:: -(integer) Maximum amount of memory, in bytes, available for use by the young generation heap. - -`peak_used`:: -(<>) Largest amount of memory historically used by the young generation heap. - -`peak_used_in_bytes`:: -(integer) Largest amount of memory, in bytes, historically used by the young generation heap. - -`peak_max`:: -(<>) Largest amount of memory historically used by the young generation heap. - -`peak_max_in_bytes`:: -(integer) Largest amount of memory, in bytes, historically used by the young generation heap. - -========= - -`survivor`:: -(object) Contains statistics about memory usage by the survivor space for the node. -+ -.Properties of `survivor` -[%collapsible%open] -========= - -`used`:: -(<>) Memory used by the survivor space. - -`used_in_bytes`:: -(integer) Memory, in bytes, used by the survivor space. - -`max`:: -(<>) Maximum amount of memory available for use by the survivor space. - -`max_in_bytes`:: -(integer) Maximum amount of memory, in bytes, available for use by the survivor space. - -`peak_used`:: -(<>) Largest amount of memory historically used by the survivor space. - -`peak_used_in_bytes`:: -(integer) Largest amount of memory, in bytes, historically used by the survivor space. - -`peak_max`:: -(<>) Largest amount of memory historically used by the survivor space. - -`peak_max_in_bytes`:: -(integer) Largest amount of memory, in bytes, historically used by the survivor space. - -========= - -`old`:: -(object) Contains statistics about memory usage by the old generation heap for the node. -+ -.Properties of `old` -[%collapsible%open] -========= - -`used`:: -(<>) Memory used by the old generation heap. - -`used_in_bytes`:: -(integer) Memory, in bytes, used by the old generation heap. - -`max`:: -(<>) Maximum amount of memory available for use by the old generation heap. - -`max_in_bytes`:: -(integer) Maximum amount of memory, in bytes, available for use by the old generation heap. - -`peak_used`:: -(<>) Largest amount of memory historically used by the old generation heap. - -`peak_used_in_bytes`:: -(integer) Largest amount of memory, in bytes, historically used by the old generation heap. - -`peak_max`:: -(<>) Highest memory limit historically available for use by the old generation heap. - -`peak_max_in_bytes`:: -(integer) Highest memory limit, in bytes, historically available for use by the old generation heap. - -========= -======== -======= - -`threads`:: -(object) Contains statistics about JVM thread usage for the node. -+ -.Properties of `threads` -[%collapsible%open] -======= - -`count`:: -(integer) Number of active threads in use by JVM. - -`peak_count`:: -(integer) Highest number of threads used by JVM. - -======= - -`gc`:: -(object) Contains statistics about JVM garbage collectors for the node. -+ -.Properties of `gc` -[%collapsible%open] -======= - -`collectors`:: -(object) Contains statistics about JVM garbage collectors for the node. -+ -.Properties of `collectors` -[%collapsible%open] -======== - -`young`:: -(object) Contains statistics about JVM garbage collectors that collect young generation objects for the node. -+ -.Properties of `young` -[%collapsible%open] -========= - -`collection_count`:: -(integer) Number of JVM garbage collectors that collect young generation objects. - -`collection_time`:: -(<>) Total time spent by JVM collecting young generation objects. - -`collection_time_in_millis`:: -(integer) Total time in milliseconds spent by JVM collecting young generation objects. - -========= - -`old`:: -(object) Contains statistics about JVM garbage collectors that collect old generation objects for the node. -+ -.Properties of `old` -[%collapsible%open] -========= - -`collection_count`:: -(integer) Number of JVM garbage collectors that collect old generation objects. - -`collection_time`:: -(<>) Total time spent by JVM collecting old generation objects. - -`collection_time_in_millis`:: -(integer) Total time in milliseconds spent by JVM collecting old generation objects. - -========= -======== -======= - -`buffer_pools`:: -(object) Contains statistics about JVM buffer pools for the node. -+ -.Properties of `buffer_pools` -[%collapsible%open] -======= - -`mapped`:: -(object) Contains statistics about mapped JVM buffer pools for the node. -+ -.Properties of `mapped` -[%collapsible%open] -======== - -`count`:: -(integer) Number of mapped buffer pools. - -`used`:: -(<>) Size of mapped buffer pools. - -`used_in_bytes`:: -(integer) Size, in bytes, of mapped buffer pools. - -`total_capacity`:: -(<>) Total capacity of mapped buffer pools. - -`total_capacity_in_bytes`:: -(integer) Total capacity, in bytes, of mapped buffer pools. - -======== - -`direct`:: -(object) Contains statistics about direct JVM buffer pools for the node. -+ -.Properties of `direct` -[%collapsible%open] -======== - -`count`:: -(integer) Number of direct buffer pools. - -`used`:: -(<>) Size of direct buffer pools. - -`used_in_bytes`:: -(integer) Size, in bytes, of direct buffer pools. - -`total_capacity`:: -(<>) Total capacity of direct buffer pools. - -`total_capacity_in_bytes`:: -(integer) Total capacity, in bytes, of direct buffer pools. - -======== -======= - -`classes`:: -(object) Contains statistics about classes loaded by JVM for the node. -+ -.Properties of `classes` -[%collapsible%open] -======= - -`current_loaded_count`:: -(integer) Number of classes currently loaded by JVM. - -`total_loaded_count`:: -(integer) Total number of classes loaded since the JVM started. - -`total_unloaded_count`:: -(integer) Total number of classes unloaded since the JVM started. - -======= -====== - -[[cluster-nodes-stats-api-response-body-repositories]] -`repositories`:: -(object) Statistics about snapshot repositories. -+ -.Properties of `repositories` -[%collapsible%open] -====== - -``:: -(object) Contains repository throttling statistics for the node. -+ -.Properties of `` -[%collapsible%open] -======= - -`total_read_throttled_time_nanos`:: -(integer) Total number of nanos which node had to wait during recovery. - -`total_write_throttled_time_nanos`:: -(integer) Total number of nanos which node had to wait during snapshotting. - -======= -====== - -[[cluster-nodes-stats-api-response-body-threadpool]] -`thread_pool`:: -(object) Contains thread pool statistics for the node -+ -.Properties of `thread_pool` -[%collapsible%open] -====== - -``:: -(object) Contains statistics about the thread pool for the node. -+ -.Properties of `` -[%collapsible%open] -======= - -`threads`:: -(integer) Number of threads in the thread pool. - -`queue`:: -(integer) Number of tasks in queue for the thread pool. - -`active`:: -(integer) Number of active threads in the thread pool. - -`rejected`:: -(integer) Number of tasks rejected by the thread pool executor. - -`largest`:: -(integer) Highest number of active threads in the thread pool. - -`completed`:: -(integer) Number of tasks completed by the thread pool executor. - -======= -====== - -[[cluster-nodes-stats-api-response-body-fs]] -`fs`:: -(object) Contains file store statistics for the node. -+ -.Properties of `fs` -[%collapsible%open] -====== - -`timestamp`:: -(integer) Last time the file stores statistics were refreshed. -Recorded in milliseconds since the {wikipedia}/Unix_time[Unix Epoch]. - -`total`:: -(object) Contains statistics for all file stores of the node. -+ -.Properties of `total` -[%collapsible%open] -======= - -`total`:: -(<>) Total size of all file stores. - -`total_in_bytes`:: -(integer) Total size (in bytes) of all file stores. - -`free`:: -(<>) Total unallocated disk space in all file stores. - -`free_in_bytes`:: -(integer) Total number of unallocated bytes in all file stores. - -`available`:: -(<>) Total disk space available to this Java virtual machine on all file stores. -Depending on OS or process level restrictions (e.g. XFS quotas), this might appear less than `free`. -This is the actual amount of free disk space the {es} node can utilise. - -`available_in_bytes`:: -(integer) Total number of bytes available to this Java virtual machine on all file stores. -Depending on OS or process level restrictions (e.g. XFS quotas), this might appear less than `free_in_bytes`. -This is the actual amount of free disk space the {es} node can utilise. - -======= - -[[cluster-nodes-stats-fs-data]] -`data`:: -(array of objects) List of all file stores. -+ -.Properties of `data` -[%collapsible%open] -======= - -`path`:: -(string) Path to the file store. - -`mount`:: -(string) Mount point of the file store (ex: /dev/sda2). - -`type`:: -(string) Type of the file store (ex: ext4). - -`total`:: -(<>) Total size of the file store. - -`total_in_bytes`:: -(integer) Total size (in bytes) of the file store. - -`free`:: -(<>) Total amount of unallocated disk space in the file store. - -`free_in_bytes`:: -(integer) Total number of unallocated bytes in the file store. - -`available`:: -(<>) Total amount of disk space available to this Java virtual machine on this file store. - -`available_in_bytes`:: -(integer) Total number of bytes available to this Java virtual machine on this file store. - -`low_watermark_free_space`:: -(<>) The effective low disk watermark for this data path on this node: when a node has less free space than this value for at least one data path, its disk usage has exceeded the low watermark. -See <> for more information about disk watermarks and their effects on shard allocation. - -`low_watermark_free_space_in_bytes`:: -(integer) The effective low disk watermark, in bytes, for this data path on this node: -when a node has less free space than this value for at least one data path, its disk usage has exceeded the low watermark. -See <> -for more information about disk watermarks and their effects on shard allocation. - -`high_watermark_free_space`:: -(<>) The effective high disk watermark for this data path on this node: when a node has less free space than this value for at least one data path, its disk usage has exceeded the high watermark. -See <> for more information about disk watermarks and their effects on shard allocation. - -`high_watermark_free_space_in_bytes`:: -(integer) The effective high disk watermark, in bytes, for this data path on this node: -when a node has less free space than this value for at least one data path, its disk usage has exceeded the high watermark. -See <> -for more information about disk watermarks and their effects on shard allocation. - -`flood_stage_free_space`:: -(<>) The effective flood stage disk watermark for this data path on this node: when a node has less free space than this value for at least one data path, its disk usage has exceeded the flood stage watermark. -See -<> for more information about disk watermarks and their effects on shard allocation. - -`flood_stage_free_space_in_bytes`:: -(integer) The effective flood stage disk watermark, in bytes, for this data path on this node: when a node has less free space than this value for at least one data path, its disk usage has exceeded the flood stage watermark. -See -<> for more information about disk watermarks and their effects on shard allocation. - -`frozen_flood_stage_free_space`:: -(<>) The effective flood stage disk watermark for this data path on a dedicated frozen node: when a dedicated frozen node has less free space than this value for at least one data path, its disk usage has exceeded the flood stage watermark. -See <> for more information about disk watermarks and their effects on shard allocation. - -`frozen_flood_stage_free_space_in_bytes`:: -(integer) The effective flood stage disk watermark, in bytes, for this data path on a dedicated frozen node: when a dedicated frozen node has less free space than this value for at least one data path, its disk usage has exceeded the flood stage watermark. -See <> for more information about disk watermarks and their effects on shard allocation. - -======= - -`io_stats` (Linux only):: -(objects) Contains I/O statistics for the node. - -NOTE: These statistics are derived from the `/proc/diskstats` kernel interface. -This interface accounts for IO performed by all processes on the system, even -if you are running {es} within a container. -+ -.Properties of `io_stats` -[%collapsible%open] -======= - -`devices` (Linux only):: -(array) Array of disk metrics for each device that is backing an {es} data path. -These disk metrics are probed periodically and averages between the last probe and the current probe are computed. -+ -.Properties of `devices` -[%collapsible%open] -======== - -`device_name` (Linux only):: -(string) The Linux device name. - -`operations` (Linux only):: -(integer) The total number of read and write operations for the device completed since starting {es}. - -`read_operations` (Linux only):: -(integer) The total number of read operations for the device completed since starting -{es}. - -`write_operations` (Linux only):: -(integer) The total number of write operations for the device completed since starting -{es}. - -`read_kilobytes` (Linux only):: -(integer) The total number of kilobytes read for the device since starting {es}. - -`write_kilobytes` (Linux only):: -(integer) The total number of kilobytes written for the device since starting {es}. - -`io_time_in_millis` (Linux only):: -(integer) The total time in milliseconds spent performing I/O operations for the device since starting {es}. - -======== - -`total` (Linux only):: -(object) The sum of the disk metrics for all devices that back an {es} data path. -+ -.Properties of `total` -[%collapsible%open] -======== - -`operations` (Linux only):: -(integer) The total number of read and write operations across all devices used by -{es} completed since starting {es}. - -`read_operations` (Linux only):: -(integer) The total number of read operations for across all devices used by {es} -completed since starting {es}. - -`write_operations` (Linux only):: -(integer) The total number of write operations across all devices used by {es} -completed since starting {es}. - -`read_kilobytes` (Linux only):: -(integer) The total number of kilobytes read across all devices used by {es} since starting {es}. - -`write_kilobytes` (Linux only):: -(integer) The total number of kilobytes written across all devices used by {es} since starting {es}. - -`io_time_in_millis` (Linux only):: -(integer) The total time in milliseconds spent performing I/O operations across all devices used by {es} since starting {es}. - -======== - -======= -====== - -[[cluster-nodes-stats-api-response-body-transport]] -`transport`:: -(object) Contains transport statistics for the node. -+ -.Properties of `transport` -[%collapsible%open] -====== - -`server_open`:: -(integer) Current number of inbound TCP connections used for internal communication between nodes. - -`total_outbound_connections`:: -(integer) The cumulative number of outbound transport connections that this node has opened since it started. -Each transport connection may comprise multiple TCP connections but is only counted once in this statistic. -Transport connections are typically <> so this statistic should remain constant in a stable cluster. - -`rx_count`:: -(integer) Total number of RX (receive) packets received by the node during internal cluster communication. - -`rx_size`:: -(<>) Size of RX packets received by the node during internal cluster communication. - -`rx_size_in_bytes`:: -(integer) Size, in bytes, of RX packets received by the node during internal cluster communication. - -`tx_count`:: -(integer) Total number of TX (transmit) packets sent by the node during internal cluster communication. - -`tx_size`:: -(<>) Size of TX packets sent by the node during internal cluster communication. - -`tx_size_in_bytes`:: -(integer) Size, in bytes, of TX packets sent by the node during internal cluster communication. - -`inbound_handling_time_histogram`:: -(array) The distribution of the time spent handling each inbound message on a transport thread, represented as a histogram. -+ -.Properties of `inbound_handling_time_histogram` -[%collapsible] -======= - -`ge`:: -(string) The inclusive lower bound of the bucket as a human-readable string. -May be omitted on the first bucket if this bucket has no lower bound. - -`ge_millis`:: -(integer) The inclusive lower bound of the bucket in milliseconds. -May be omitted on the first bucket if this bucket has no lower bound. - -`lt`:: -(string) The exclusive upper bound of the bucket as a human-readable string. -May be omitted on the last bucket if this bucket has no upper bound. - -`lt_millis`:: -(integer) The exclusive upper bound of the bucket in milliseconds. -May be omitted on the last bucket if this bucket has no upper bound. - -`count`:: -(integer) The number of times a transport thread took a period of time within the bounds of this bucket to handle an inbound message. - -======= - -`outbound_handling_time_histogram`:: -(array) The distribution of the time spent sending each outbound transport message on a transport thread, represented as a histogram. -+ -.Properties of `outbound_handling_time_histogram` -[%collapsible] -======= - -`ge`:: -(string) The inclusive lower bound of the bucket as a human-readable string. -May be omitted on the first bucket if this bucket has no lower bound. - -`ge_millis`:: -(integer) The inclusive lower bound of the bucket in milliseconds. -May be omitted on the first bucket if this bucket has no lower bound. - -`lt`:: -(string) The exclusive upper bound of the bucket as a human-readable string. -May be omitted on the last bucket if this bucket has no upper bound. - -`lt_millis`:: -(integer) The exclusive upper bound of the bucket in milliseconds. -May be omitted on the last bucket if this bucket has no upper bound. - -`count`:: -(integer) The number of times a transport thread took a period of time within the bounds of this bucket to send a transport message. - -======= - -`actions`:: -(object) An action-by-action breakdown of the transport traffic handled by this node, showing the total amount of traffic and a histogram of message sizes for incoming requests and outgoing responses. -+ -.Properties of `actions.*.requests` and `actions.*.responses` -[%collapsible] -======= - -`count`:: -(integer) The total number of requests received, or responses sent, for the current action. - -`total_size`:: -(<>) The total size (as a human-readable string) of all requests received, or responses sent, for the current action. - -`total_size_in_bytes`:: -(integer) The total size in bytes of all requests received, or responses sent, for the current action. - -`histogram`:: -(array) A breakdown of the distribution of sizes of requests received, or responses sent, for the current action. -+ -.Properties of `histogram` -[%collapsible] -======== - -`ge`:: -(<>) The inclusive lower bound of the bucket as a human-readable string. -May be omitted on the first bucket if this bucket has no lower bound. - -`ge_bytes`:: -(integer) The inclusive lower bound of the bucket in bytes. -May be omitted on the first bucket if this bucket has no lower bound. - -`lt`:: -(<>) The exclusive upper bound of the bucket as a human-readable string. -May be omitted on the last bucket if this bucket has no upper bound. - -`lt_bytes`:: -(integer) The exclusive upper bound of the bucket in bytes. -May be omitted on the last bucket if this bucket has no upper bound. - -`count`:: -(integer) The number of times a request was received, or a response sent, with a size within the bounds of this bucket. - -======== -======= - -====== - -[[cluster-nodes-stats-api-response-body-http]] -`http`:: -(object) Contains http statistics for the node. -+ -.Properties of `http` -[%collapsible%open] -====== - -`current_open`:: -(integer) Current number of open HTTP connections for the node. - -`total_opened`:: -(integer) Total number of HTTP connections opened for the node. - -`clients`:: -(array of objects) Information on current and recently-closed HTTP client connections. -Clients that have been closed longer than the <> -setting will not be represented here. -+ -.Properties of `clients` -[%collapsible%open] -======= - -`id`:: -(integer) Unique ID for the HTTP client. - -`agent`:: -(string) Reported agent for the HTTP client. -If unavailable, this property is not included in the response. - -`local_address`:: -(string) Local address for the HTTP connection. - -`remote_address`:: -(string) Remote address for the HTTP connection. - -`last_uri`:: -(string) The URI of the client's most recent request. - -`x_forwarded_for`:: -(string) Value from the client's `x-forwarded-for` HTTP header. -If unavailable, this property is not included in the response. - -`x_opaque_id`:: -(string) Value from the client's `x-opaque-id` HTTP header. -If unavailable, this property is not included in the response. - -`opened_time_millis`:: -(integer) Time at which the client opened the connection. - -`closed_time_millis`:: -(integer) Time at which the client closed the connection if the connection is closed. - -`last_request_time_millis`:: -(integer) Time of the most recent request from this client. - -`request_count`:: -(integer) Number of requests from this client. - -`request_size_bytes`:: -(integer) Cumulative size in bytes of all requests from this client. - -======= -====== - -[[cluster-nodes-stats-api-response-body-breakers]] -`breakers`:: -(object) Contains circuit breaker statistics for the node. -+ -.Properties of `breakers` -[%collapsible%open] -====== - -``:: -(object) Contains statistics for the circuit breaker. -+ -.Properties of `` -[%collapsible%open] -======= - -`limit_size_in_bytes`:: -(integer) Memory limit, in bytes, for the circuit breaker. - -`limit_size`:: -(<>) Memory limit for the circuit breaker. - -`estimated_size_in_bytes`:: -(integer) Estimated memory used, in bytes, for the operation. - -`estimated_size`:: -(<>) Estimated memory used for the operation. - -`overhead`:: -(float) A constant that all estimates for the circuit breaker are multiplied with to calculate a final estimate. - -`tripped`:: -(integer) Total number of times the circuit breaker has been triggered and prevented an out of memory error. - -======= -====== - -[[cluster-nodes-stats-api-response-body-script]] -`script`:: -(object) Contains script statistics for the node. -+ -.Properties of `script` -[%collapsible%open] -====== - -`compilations`:: -(integer) Total number of inline script compilations performed by the node. - -`compilations_history`:: -(object) Contains this recent history of script compilations - -.Properties of `compilations_history` -[%collapsible%open] -======= - -`5m`:: -(long) The number of script compilations in the last five minutes. -`15m`:: -(long) The number of script compilations in the last fifteen minutes. -`24h`:: -(long) The number of script compilations in the last twenty-four hours. - -======= - -`cache_evictions`:: -(integer) Total number of times the script cache has evicted old data. - - -`cache_evictions_history`:: -(object) Contains this recent history of script cache evictions - -.Properties of `cache_evictions` -[%collapsible%open] -======= -`5m`:: -(long) The number of script cache evictions in the last five minutes. -`15m`:: -(long) The number of script cache evictions in the last fifteen minutes. -`24h`:: -(long) The number of script cache evictions in the last twenty-four hours. -======= - -`compilation_limit_triggered`:: -(integer) Total number of times the <> circuit breaker has limited inline script compilations. - -====== - -[[cluster-nodes-stats-api-response-body-discovery]] -`discovery`:: -(object) Contains node discovery statistics for the node. -+ -.Properties of `discovery` -[%collapsible%open] -====== - -`cluster_state_queue`:: -(object) Contains statistics for the cluster state queue of the node. -+ -.Properties of `cluster_state_queue` -[%collapsible%open] -======= -`total`:: -(integer) Total number of cluster states in queue. - -`pending`:: -(integer) Number of pending cluster states in queue. - -`committed`:: -(integer) Number of committed cluster states in queue. - -======= - -`published_cluster_states`:: -(object) Contains statistics for the published cluster states of the node. -+ -.Properties of `published_cluster_states` -[%collapsible%open] -======= - -`full_states`:: -(integer) Number of published cluster states. - -`incompatible_diffs`:: -(integer) Number of incompatible differences between published cluster states. - -`compatible_diffs`:: -(integer) Number of compatible differences between published cluster states. - -======= - -`cluster_state_update`:: -(object) Contains low-level statistics about how long various activities took during cluster state updates while the node was the elected master. -Omitted if the node is not master-eligible. -Every field whose name ends in `_time` within this object is also represented as a raw number of milliseconds in a field whose name ends in `_time_millis`. -The human-readable fields with a `_time` suffix are only returned if requested with the `?human=true` query parameter. -+ -.Properties of `cluster_state_update` -[%collapsible] -======= - -`unchanged`:: -(object) Contains statistics about cluster state update attempts that did not change the cluster state. -+ -.Properties of `unchanged` -[%collapsible] -======== - -`count`:: -(long) The number of cluster state update attempts that did not change the cluster state since the node started. - -`computation_time`:: -(<>) The cumulative amount of time spent computing no-op cluster state updates since the node started. - -`notification_time`:: -(<>) The cumulative amount of time spent notifying listeners of a no-op cluster state update since the node started. - -======== - -`success`:: -(object) Contains statistics about cluster state update attempts that successfully changed the cluster state. -+ -.Properties of `success` -[%collapsible] -======== - -`count`:: -(long) The number of cluster state update attempts that successfully changed the cluster state since the node started. - -`computation_time`:: -(<>) The cumulative amount of time spent computing cluster state updates that were ultimately successful since the node started. - -`publication_time`:: -(<>) The cumulative amount of time spent publishing cluster state updates which ultimately succeeded, which includes everything from the start of the publication (i.e. just after the computation of the new cluster state) until the publication has finished and the master node is ready to start processing the next state update. -This includes the time measured by -`context_construction_time`, `commit_time`, `completion_time` and -`master_apply_time`. - -`context_construction_time`:: -(<>) The cumulative amount of time spent constructing a _publication context_ since the node started for publications that ultimately succeeded. -This statistic includes the time spent computing the difference between the current and new cluster state preparing a serialized representation of this difference. - -`commit_time`:: -(<>) The cumulative amount of time spent waiting for a successful cluster state update to _commit_, which measures the time from the start of each publication until a majority of the master-eligible nodes have written the state to disk and confirmed the write to the elected master. - -`completion_time`:: -(<>) The cumulative amount of time spent waiting for a successful cluster state update to _complete_, which measures the time from the start of each publication until all the other nodes have notified the elected master that they have applied the cluster state. - -`master_apply_time`:: -(<>) The cumulative amount of time spent successfully applying cluster state updates on the elected master since the node started. - -`notification_time`:: -(<>) The cumulative amount of time spent notifying listeners of a successful cluster state update since the node started. - -======== - -`failure`:: -(object) Contains statistics about cluster state update attempts that did not successfully change the cluster state, typically because a new master node was elected before completion. -+ -.Properties of `failure` -[%collapsible] -======== - -`count`:: -(long) The number of cluster state update attempts that failed to change the cluster state since the node started. - -`computation_time`:: -(<>) The cumulative amount of time spent computing cluster state updates that were ultimately unsuccessful since the node started. - -`publication_time`:: -(<>) The cumulative amount of time spent publishing cluster state updates which ultimately failed, which includes everything from the start of the publication (i.e. just after the computation of the new cluster state) until the publication has finished and the master node is ready to start processing the next state update. -This includes the time measured by -`context_construction_time`, `commit_time`, `completion_time` and -`master_apply_time`. - -`context_construction_time`:: -(<>) The cumulative amount of time spent constructing a _publication context_ since the node started for publications that ultimately failed. -This statistic includes the time spent computing the difference between the current and new cluster state preparing a serialized representation of this difference. - -`commit_time`:: -(<>) The cumulative amount of time spent waiting for an unsuccessful cluster state update to _commit_, which measures the time from the start of each publication until a majority of the master-eligible nodes have written the state to disk and confirmed the write to the elected master. - -`completion_time`:: -(<>) The cumulative amount of time spent waiting for an unsuccessful cluster state update to _complete_, which measures the time from the start of each publication until all the other nodes have notified the elected master that they have applied the cluster state. - -`master_apply_time`:: -(<>) The cumulative amount of time spent unsuccessfully applying cluster state updates on the elected master since the node started. - -`notification_time`:: -(<>) The cumulative amount of time spent notifying listeners of a failed cluster state update since the node started. - -======== -======= -====== - -[[cluster-nodes-stats-api-response-body-ingest]] -`ingest`:: -(object) Contains ingest statistics for the node. -+ -.Properties of `ingest` -[%collapsible%open] -====== - -`total`:: -(object) Contains statistics about ingest operations for the node. -+ -.Properties of `total` -[%collapsible%open] -======= - -`count`:: -(integer) Total number of documents ingested during the lifetime of this node. - -`time`:: -(<>) Total time spent preprocessing ingest documents during the lifetime of this node. - -`time_in_millis`:: -(integer) Total time, in milliseconds, spent preprocessing ingest documents during the lifetime of this node. - -`current`:: -(integer) Total number of documents currently being ingested. - -`failed`:: -(integer) Total number of failed ingest operations during the lifetime of this node. - -======= - -`pipelines`:: -(object) Contains statistics about ingest pipelines for the node. -+ -.Properties of `pipelines` -[%collapsible%open] -======= - -``:: -(object) Contains statistics about the ingest pipeline. -+ -.Properties of `` -[%collapsible%open] -======== - -`count`:: -(integer) Number of documents preprocessed by the ingest pipeline. - -`time`:: -(<>) Total time spent preprocessing documents in the ingest pipeline. - -`time_in_millis`:: -(integer) Total time, in milliseconds, spent preprocessing documents in the ingest pipeline. - -`failed`:: -(integer) Total number of failed operations for the ingest pipeline. - -`ingested_as_first_pipeline`:: -(<>) -Total ingested size of all documents which were first processed by this pipeline. -A document is not added to the stat value for this pipeline if it is a final pipeline after a default pipeline, a pipeline -run after a reroute processor, or is within a pipeline processor. -Instead, the document size is added to the stat value of the pipeline which initially ingested the document. - -`ingested_as_first_pipeline_in_bytes`:: -(integer) -Total ingested size, in bytes, of all documents which were first processed by this pipeline. -A document is not added to the stat value for this pipeline if it is a final pipeline after a default pipeline, a pipeline -run after a reroute processor, or is within a pipeline processor. -Instead, the document size is added to the stat value of the pipeline which initially ingested the document. - -`produced_as_first_pipeline`:: -(<>) -Total produced size of all documents which were first processed by this pipeline. -A document is not added to the stat value for this pipeline if it is a final pipeline after a default pipeline, a pipeline -run after a reroute processor, or is within a pipeline processor. -Instead, the document size is added to the stat value of the pipeline which initially ingested the document. - -`produced_as_first_pipeline_in_bytes`:: -(integer) -Total produced size, in bytes, of all documents which were first processed by this pipeline. -A document is not added to the stat value for this pipeline if it is a final pipeline after a default pipeline, a pipeline -run after a reroute processor, or is within a pipeline processor. -Instead, the document size is added to the stat value of the pipeline which initially ingested the document. - -`processors`:: -(array of objects) Contains statistics for the ingest processors for the ingest pipeline. -+ -.Properties of `processors` -[%collapsible%open] -========= - -``:: -(object) Contains statistics for the ingest processor. -+ -.Properties of `` -[%collapsible%open] -========== -`count`:: -(integer) Number of documents transformed by the processor. - -`time`:: -(<>) Time spent by the processor transforming documents. - -`time_in_millis`:: -(integer) Time, in milliseconds, spent by the processor transforming documents. - -`current`:: -(integer) Number of documents currently being transformed by the processor. - -`failed`:: -(integer) Number of failed operations for the processor. - -========== -========= -======== -======= -====== - -[[cluster-nodes-stats-api-response-body-indexing-pressure]] -`indexing_pressure`:: -(object) Contains <> statistics for the node. -+ -.Properties of `indexing_pressure` -[%collapsible%open] -====== - -`memory`:: -(object) Contains statistics for memory consumption from indexing load. -+ -.Properties of `` -[%collapsible%open] -======= - -`current`:: -(object) Contains statistics for current indexing load. -+ -.Properties of `` -[%collapsible%open] -======== - -`combined_coordinating_and_primary`:: -(<>) Memory consumed by indexing requests in the coordinating or primary stage. -This value is not the sum of coordinating and primary as a node can reuse the coordinating memory if the primary stage is executed locally. - -`combined_coordinating_and_primary_in_bytes`:: -(integer) Memory consumed, in bytes, by indexing requests in the coordinating or primary stage. -This value is not the sum of coordinating and primary as a node can reuse the coordinating memory if the primary stage is executed locally. - -`coordinating`:: -(<>) Memory consumed by indexing requests in the coordinating stage. - -`coordinating_in_bytes`:: -(integer) Memory consumed, in bytes, by indexing requests in the coordinating stage. - -`primary`:: -(<>) Memory consumed by indexing requests in the primary stage. - -`primary_in_bytes`:: -(integer) Memory consumed, in bytes, by indexing requests in the primary stage. - -`replica`:: -(<>) Memory consumed by indexing requests in the replica stage. - -`replica_in_bytes`:: -(integer) Memory consumed, in bytes, by indexing requests in the replica stage. - -`all`:: -(<>) Memory consumed by indexing requests in the coordinating, primary, or replica stage. - -`all_in_bytes`:: -(integer) Memory consumed, in bytes, by indexing requests in the coordinating, primary, or replica stage. - -======== -`total`:: -(object) Contains statistics for the cumulative indexing load since the node started. -+ -.Properties of `` -[%collapsible%open] -======== - -`combined_coordinating_and_primary`:: -(<>) Memory consumed by indexing requests in the coordinating or primary stage. -This value is not the sum of coordinating and primary as a node can reuse the coordinating memory if the primary stage is executed locally. - -`combined_coordinating_and_primary_in_bytes`:: -(integer) Memory consumed, in bytes, by indexing requests in the coordinating or primary stage. -This value is not the sum of coordinating and primary as a node can reuse the coordinating memory if the primary stage is executed locally. - -`coordinating`:: -(<>) Memory consumed by indexing requests in the coordinating stage. - -`coordinating_in_bytes`:: -(integer) Memory consumed, in bytes, by indexing requests in the coordinating stage. - -`primary`:: -(<>) Memory consumed by indexing requests in the primary stage. - -`primary_in_bytes`:: -(integer) Memory consumed, in bytes, by indexing requests in the primary stage. - -`replica`:: -(<>) Memory consumed by indexing requests in the replica stage. - -`replica_in_bytes`:: -(integer) Memory consumed, in bytes, by indexing requests in the replica stage. - -`all`:: -(<>) Memory consumed by indexing requests in the coordinating, primary, or replica stage. - -`all_in_bytes`:: -(integer) Memory consumed, in bytes, by indexing requests in the coordinating, primary, or replica stage. - -`coordinating_rejections`:: -(integer) Number of indexing requests rejected in the coordinating stage. - -`primary_rejections`:: -(integer) Number of indexing requests rejected in the primary stage. - -`replica_rejections`:: -(integer) Number of indexing requests rejected in the replica stage. - -======== -`limit`:: -(<>) Configured memory limit for the indexing requests. -Replica requests have an automatic limit that is 1.5x this value. - -`limit_in_bytes`:: -(integer) Configured memory limit, in bytes, for the indexing requests. -Replica requests have an automatic limit that is 1.5x this value. - -======= -====== - -[[cluster-nodes-stats-api-response-body-adaptive-selection]] -`adaptive_selection`:: -(object) Contains adaptive selection statistics for the node. -+ -.Properties of `adaptive_selection` -[%collapsible%open] -====== - -`outgoing_searches`:: -(integer) The number of outstanding search requests from the node these stats are for to the keyed node. - -`avg_queue_size`:: -(integer) The exponentially weighted moving average queue size of search requests on the keyed node. - -`avg_service_time`:: -(<>) The exponentially weighted moving average service time of search requests on the keyed node. - -`avg_service_time_ns`:: -(integer) The exponentially weighted moving average service time, in nanoseconds, of search requests on the keyed node. - -`avg_response_time`:: -(<>) The exponentially weighted moving average response time of search requests on the keyed node. - -`avg_response_time_ns`:: -(integer) The exponentially weighted moving average response time, in nanoseconds, of search requests on the keyed node. - -`rank`:: -(string) The rank of this node; used for shard selection when routing search requests. - -====== - -[[cluster-nodes-stats-api-response-body-allocations]] -`allocations`:: -(object) Contains allocations statistics for the node. -+ -.Properties of `allocations` -[%collapsible%open] -====== - -`shards`:: -(integer) The number of shards currently allocated to this node - -`undesired_shards`:: -(integer) The amount of shards that are scheduled to be moved elsewhere in the cluster if desired balance allocator is used or -1 if any other allocator is used. - -`forecasted_ingest_load`:: -(double) Total forecasted ingest load of all shards assigned to this node - -`forecasted_disk_usage`:: -(<>) Forecasted size of all shards assigned to the node - -`forecasted_disk_usage_bytes`:: -(integer) Forecasted size, in bytes, of all shards assigned to the node - -`current_disk_usage`:: -(<>) Current size of all shards assigned to the node - -`current_disk_usage_bytes`:: -(integer) Current size, in bytes, of all shards assigned to the node - -====== -===== -==== - -[[cluster-nodes-stats-api-example]] -==== {api-examples-title} - -[source,console,id=nodes-stats-limit] ----- -# return just indices -GET /_nodes/stats/indices - -# return just os and process -GET /_nodes/stats/os,process - -# return just process for node with IP address 10.0.0.1 -GET /_nodes/10.0.0.1/stats/process ----- - -All stats can be explicitly requested via `/_nodes/stats/_all` or -`/_nodes/stats?metric=_all`. - -You can get information about indices stats on `node`, `indices`, or `shards` -level. - -[source,console,id=nodes-stats-indices] ----- -# Fielddata summarized by node -GET /_nodes/stats/indices/fielddata?fields=field1,field2 - -# Fielddata summarized by node and index -GET /_nodes/stats/indices/fielddata?level=indices&fields=field1,field2 - -# Fielddata summarized by node, index, and shard -GET /_nodes/stats/indices/fielddata?level=shards&fields=field1,field2 - -# You can use wildcards for field names -GET /_nodes/stats/indices/fielddata?fields=field* ----- - -You can get statistics about search groups for searches executed on this node. - -[source,console,id=nodes-stats-groups] ----- -# All groups with all stats -GET /_nodes/stats?groups=_all - -# Some groups from just the indices stats -GET /_nodes/stats/indices?groups=foo,bar ----- - -[[cluster-nodes-stats-ingest-ex]] -===== Retrieve ingest statistics only - -To return only ingest-related node statistics, set the `` path parameter to `ingest` and use the -<> query parameter. - -[source,console,id=nodes-stats-filter-path] ----- -GET /_nodes/stats/ingest?filter_path=nodes.*.ingest ----- - -You can use the `metric` and `filter_path` query parameters to get the same response. - -[source,console,id=nodes-stats-metric-filter-path] ----- -GET /_nodes/stats?metric=ingest&filter_path=nodes.*.ingest ----- - -To further refine the response, change the `filter_path` value. -For example, the following request only returns ingest pipeline statistics. - -[source,console,id=nodes-stats-metric-filter-path-refined] ----- -GET /_nodes/stats?metric=ingest&filter_path=nodes.*.ingest.pipelines ----- diff --git a/docs/reference/cluster/nodes-usage.asciidoc b/docs/reference/cluster/nodes-usage.asciidoc deleted file mode 100644 index c7994e32204a6..0000000000000 --- a/docs/reference/cluster/nodes-usage.asciidoc +++ /dev/null @@ -1,115 +0,0 @@ -[[cluster-nodes-usage]] -=== Nodes feature usage API -++++ -Nodes feature usage -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Returns information on the usage of features. - -[[cluster-nodes-usage-api-request]] -==== {api-request-title} - -`GET /_nodes/usage` + - -`GET /_nodes//usage` + - -`GET /_nodes/usage/` + - -`GET /_nodes//usage/` - -[[cluster-nodes-usage-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cluster-nodes-usage-api-desc]] -==== {api-description-title} - -The cluster nodes usage API allows you to retrieve information on the usage -of features for each node. All the nodes selective options are explained -<>. - - -[[cluster-nodes-usage-api-path-params]] -==== {api-path-parms-title} - -``:: - (Optional, string) Limits the information returned to the specific metrics. - A comma-separated list of the following options: -+ --- - `_all`:: - Returns all stats. - - `rest_actions`:: - Returns the REST actions classname with a count of the number of times - that action has been called on the node. --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=node-id] - - -[[cluster-nodes-usage-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeout-nodes-request] - - -[[cluster-nodes-usage-api-example]] -==== {api-examples-title} - -Rest action example: - -[source,console] --------------------------------------------------- -GET _nodes/usage --------------------------------------------------- -// TEST[setup:node] - -The API returns the following response: - -[source,console-result] --------------------------------------------------- -{ - "_nodes": { - "total": 1, - "successful": 1, - "failed": 0 - }, - "cluster_name": "my_cluster", - "nodes": { - "pQHNt5rXTTWNvUgOrdynKg": { - "timestamp": 1492553961812, <1> - "since": 1492553906606, <2> - "rest_actions": { - "nodes_usage_action": 1, - "create_index_action": 1, - "document_get_action": 1, - "search_action": 19, <3> - "nodes_info_action": 36 - }, - "aggregations": { - ... - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"my_cluster"/$body.cluster_name/] -// TESTRESPONSE[s/"pQHNt5rXTTWNvUgOrdynKg"/\$node_name/] -// TESTRESPONSE[s/1492553961812/$body.$_path/] -// TESTRESPONSE[s/1492553906606/$body.$_path/] -// TESTRESPONSE[s/"rest_actions": [^}]+}/"rest_actions": $body.$_path/] -// TESTRESPONSE[s/"aggregations": [^}]+}/"aggregations": $body.$_path/] -<1> Timestamp for when this nodes usage request was performed. -<2> Timestamp for when the usage information recording was started. This is -equivalent to the time that the node was started. -<3> Search action has been called 19 times for this node. - diff --git a/docs/reference/cluster/pending.asciidoc b/docs/reference/cluster/pending.asciidoc deleted file mode 100644 index f5d42a6df76a6..0000000000000 --- a/docs/reference/cluster/pending.asciidoc +++ /dev/null @@ -1,114 +0,0 @@ -[[cluster-pending]] -=== Pending cluster tasks API -++++ -Pending cluster tasks -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Returns cluster-level changes that have not yet been executed. - - -[[cluster-pending-api-request]] -==== {api-request-title} - -`GET /_cluster/pending_tasks` - -[[cluster-pending-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cluster-pending-api-desc]] -==== {api-description-title} - -The pending cluster tasks API returns a list of any cluster-level changes (e.g. -create index, update mapping, allocate or fail shard) which have not yet been -executed. - -NOTE: This API returns a list of any pending updates to the cluster state. These are distinct from the tasks reported by the -<> which include periodic tasks and tasks initiated by the user, such as node stats, search queries, or create -index requests. However, if a user-initiated task such as a create index command causes a cluster state update, the activity of this task -might be reported by both task api and pending cluster tasks API. - - -[[cluster-pending-api-path-params]] -==== {api-path-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=local] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - - -[[cluster-pending-api-response-body]] -==== {api-response-body-title} - -`tasks`:: - (object) A list of pending tasks. - -`insert_order`:: - (integer) The number that represents when the task has been inserted into - the task queue. - -`priority`:: - (string) The priority of the pending task. The valid priorities in descending priority order are: `IMMEDIATE` > `URGENT` > `HIGH` > `NORMAL` > `LOW` > `LANGUID`. - -`source`:: - (string) A general description of the cluster task that may include a reason - and origin. - -`executing`:: - (boolean) True or false, indicating whether the pending tasks is currently getting executed or not. - -`time_in_queue_millis`:: - (integer) The time expressed in milliseconds since the task is waiting for - being performed. - -`time_in_queue`:: - (string) The time since the task is waiting for being performed. - - -[[cluster-pending-api-example]] -==== {api-examples-title} - -Usually the request will return an empty list as cluster-level changes are fast. -However, if there are tasks queued up, the response will look similar like this: - -[source,js] --------------------------------------------------- -{ - "tasks": [ - { - "insert_order": 101, - "priority": "URGENT", - "source": "create-index [foo_9], cause [api]", - "executing" : true, - "time_in_queue_millis": 86, - "time_in_queue": "86ms" - }, - { - "insert_order": 46, - "priority": "HIGH", - "source": "shard-started ([foo_2][1], node[tMTocMvQQgGCkj7QDHl3OA], [P], s[INITIALIZING]), reason [after recovery from shard_store]", - "executing" : false, - "time_in_queue_millis": 842, - "time_in_queue": "842ms" - }, - { - "insert_order": 45, - "priority": "HIGH", - "source": "shard-started ([foo_2][0], node[tMTocMvQQgGCkj7QDHl3OA], [P], s[INITIALIZING]), reason [after recovery from shard_store]", - "executing" : false, - "time_in_queue_millis": 858, - "time_in_queue": "858ms" - } - ] -} --------------------------------------------------- -// NOTCONSOLE -// We can't test tasks output diff --git a/docs/reference/cluster/prevalidate-node-removal.asciidoc b/docs/reference/cluster/prevalidate-node-removal.asciidoc deleted file mode 100644 index 0a09f1adda77c..0000000000000 --- a/docs/reference/cluster/prevalidate-node-removal.asciidoc +++ /dev/null @@ -1,146 +0,0 @@ -[[prevalidate-node-removal-api]] -=== Prevalidate node removal API -++++ -Prevalidate node removal -++++ - -NOTE: {cloud-only} - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Prevalidate node removal. - -[[prevalidate-node-removal-api-request]] -==== {api-request-title} - -`POST /_internal/prevalidate_node_removal` - -[[prevalidate-node-removal-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or `manage` <> to use this API. - -[[prevalidate-node-removal-api-desc]] -==== {api-description-title} - -This API checks whether attempting to remove the specified node(s) from the cluster is likely to succeed or not. For a cluster with no unassigned shards, removal of any node is considered safe which means the removal of the nodes is likely to succeed. - -In case the cluster has a <>, it verifies that the removal of the node(s) would not risk removing the last remaining copy of an unassigned shard. If there are red indices in the cluster, the API checks whether the red indices are <> indices, and if not, it sends a request to each of nodes specified in the API call to verify whether the nodes might contain local shard copies of the red indices that are not Searchable Snapshot indices. This request is processed on each receiving node, by checking whether the node has a shard directory for any of the red index shards. - -The response includes the overall safety of the removal of the specified nodes, and a detailed response for each node. The node-specific part of the response also includes more details on why removal of that node might not succeed. - -Note that only one of the query parameters (`names`, `ids`, or `external_ids`) must be used to specify the set of nodes. - -Note that if the prevalidation result for a set of nodes returns `true` (i.e. it is likely to succeed), this does not mean that all those nodes could be successfully removed at once, but rather removal of each individual node would potentially be successful. The actual node removal could be handled via the <>. - -[[prevalidate-node-removal-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] - -`names`:: -(Optional, string) Comma-separated list of node names. - -`ids`:: -(Optional, string) Comma-separated list of node IDs. - -`external_ids`:: -(Optional, string) Comma-separated list of node external IDs. - -[[prevalidate-node-removal-api-response-body]] -==== {api-response-body-title} - -`is_safe`:: - (boolean) Whether the removal of all the provided nodes is safe or not. - -`message`:: - (string) A message providing more detail on why the operation is considered safe or not. - -`nodes`:: - (object) Prevalidation result for the removal of each of the provided nodes. -+ -.Properties of `nodes` -[%collapsible%open] -==== -``:: - (object) Contains information about the removal prevalidation of a specific node. -+ -.Properties of `` -[%collapsible%open] -======= -`id`:: - (string) node ID -`name`:: - (string) node name -`external_id`:: - (string) node external ID -`result`:: - (object) Contains removal prevalidation result of the node. -+ -.Properties of `result` -[%collapsible%open] -======== -`is_safe`:: - (boolean) Whether the removal of the node is considered safe or not. -`reason`:: - (string) A string that specifies the reason why the prevalidation result is considered safe or not. It can be one of the following values: -+ --- - * `no_problems`: The prevalidation did not find any issues that could prevent the node from being safely removed. - * `no_red_shards_except_searchable_snapshots`: The node can be safely removed as all red indices are searchable snapshot indices and therefore removing a node does not risk removing the last copy of that index from the cluster. - * `no_red_shards_on_node`: The node does not contain any copies of the red non-searchable-snapshot index shards. - * `red_shards_on_node`: The node might contain shard copies of some non-searchable-snapshot red indices. The list of the shards that might be on the node are specified in the `message` field. - * `unable_to_verify_red_shards`: Contacting the node failed or timed out. More details is provided in the `message` field. --- -`message`:: - (Optional, string) Detailed information about the removal prevalidation result. -======== -======= -==== - -[[prevalidate-node-removal-api-example]] -==== {api-examples-title} - -This example validates whether it is safe to remove the nodes `node1` and `node2`. The response indicates that it is safe to remove `node1`, but it might not be safe to remove `node2` as it might contain copies of the specified red shards. Therefore, the overall prevalidation of the removal of the two nodes returns `false`. - -[source,console] --------------------------------------------------- -POST /_internal/prevalidate_node_removal?names=node1,node2 --------------------------------------------------- -// TEST[skip:doc tests run with only a single node] - -The API returns the following response: - -[source,console-result] --------------------------------------------------- -{ - "is_safe": false, - "message": "removal of the following nodes might not be safe: [node2-id]", - "nodes": [ - { - "id": "node1-id", - "name" : "node1", - "external_id" : "node1-externalId", - "result" : { - "is_safe": true, - "reason": "no_red_shards_on_node", - "message": "" - } - }, - { - "id": "node2-id", - "name" : "node2", - "external_id" : "node2-externalId", - "result" : { - "is_safe": false, - "reason": "red_shards_on_node", - "message": "node contains copies of the following red shards: [[indexName][0]]" - } - } - ] -} --------------------------------------------------- diff --git a/docs/reference/cluster/remote-info.asciidoc b/docs/reference/cluster/remote-info.asciidoc deleted file mode 100644 index e91ccc4d8f4a1..0000000000000 --- a/docs/reference/cluster/remote-info.asciidoc +++ /dev/null @@ -1,90 +0,0 @@ -[[cluster-remote-info]] -=== Remote cluster info API -++++ -Remote cluster info -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Returns configured remote cluster information. - -[[cluster-remote-info-api-request]] -==== {api-request-title} - -`GET /_remote/info` - -[[cluster-remote-info-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cluster-remote-info-api-desc]] -==== {api-description-title} - -The cluster remote info API allows you to retrieve information about configured -remote clusters. It returns connection and endpoint information keyed -by the configured remote cluster alias. - -TIP: This API returns information that reflects current state on the local cluster. -The `connected` field does not necessarily reflect whether a remote cluster is -down or unavailable, only whether there is currently an open connection to it. -Elasticsearch does not spontaneously try to reconnect to a disconnected remote -cluster. To trigger a reconnection, attempt a <>, -<>, or try the -<> endpoint. - - -[[cluster-remote-info-api-response-body]] -==== {api-response-body-title} - -`mode`:: - Connection mode for the remote cluster. Returned values are `sniff` and - `proxy`. - -`connected`:: - True if there is at least one open connection to the remote cluster. When - false, it means that the cluster no longer has an open connection to the - remote cluster. It does not necessarily mean that the remote cluster is - down or unavailable, just that at some point a connection was lost. - -`initial_connect_timeout`:: - The initial connect timeout for remote cluster connections. - -[[skip-unavailable]] -`skip_unavailable`:: -Whether a {ccs} skips the remote cluster if its nodes are unavailable during the -search. If `true`, a {ccs} also ignores errors returned by the remote cluster. -Refer to <>. - -`seeds`:: - Initial seed transport addresses of the remote cluster when sniff mode is - configured. - -`num_nodes_connected`:: - Number of connected nodes in the remote cluster when sniff mode is - configured. - -`max_connections_per_cluster`:: - Maximum number of connections maintained for the remote cluster when sniff - mode is configured. - -`proxy_address`:: - Address for remote connections when proxy mode is configured. - -`num_proxy_sockets_connected`:: - Number of open socket connections to the remote cluster when proxy mode - is configured. - -`max_proxy_socket_connections`:: - The maximum number of socket connections to the remote cluster when proxy - mode is configured. - -`cluster_credentials`:: -This field presents and has value of `::es_redacted::` only when the -<>. -Otherwise, the field is not present. diff --git a/docs/reference/cluster/reroute.asciidoc b/docs/reference/cluster/reroute.asciidoc deleted file mode 100644 index b393a9a68d2b2..0000000000000 --- a/docs/reference/cluster/reroute.asciidoc +++ /dev/null @@ -1,219 +0,0 @@ -[[cluster-reroute]] -=== Cluster reroute API -++++ -Cluster reroute -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Changes the allocation of shards in a cluster. - -[[cluster-reroute-api-request]] -==== {api-request-title} - -`POST /_cluster/reroute` - -[[cluster-reroute-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `manage` -<> to use this API. - -[[cluster-reroute-api-desc]] -==== {api-description-title} - -The reroute command allows for manual changes to the allocation of individual -shards in the cluster. For example, a shard can be moved from one node to -another explicitly, an allocation can be cancelled, and an unassigned shard can -be explicitly allocated to a specific node. - -It is important to note that after processing any reroute commands {es} will -perform rebalancing as normal (respecting the values of settings such as -`cluster.routing.rebalance.enable`) in order to remain in a balanced state. For -example, if the requested allocation includes moving a shard from `node1` to -`node2` then this may cause a shard to be moved from `node2` back to `node1` to -even things out. - -The cluster can be set to disable allocations using the -`cluster.routing.allocation.enable` setting. If allocations are disabled then -the only allocations that will be performed are explicit ones given using the -`reroute` command, and consequent allocations due to rebalancing. - -It is possible to run `reroute` commands in "dry run" mode by using the -`?dry_run` URI query parameter, or by passing `"dry_run": true` in the request -body. This will calculate the result of applying the commands to the current -cluster state, and return the resulting cluster state after the commands (and -re-balancing) has been applied, but will not actually perform the requested -changes. - -If the `?explain` URI query parameter is included then a detailed explanation -of why the commands could or could not be executed is included in the response. - -The cluster will attempt to allocate a shard a maximum of -`index.allocation.max_retries` times in a row (defaults to `5`), before giving -up and leaving the shard unallocated. This scenario can be caused by -structural problems such as having an analyzer which refers to a stopwords -file which doesn't exist on all nodes. - -Once the problem has been corrected, allocation can be manually retried by -calling the `reroute` API with the `?retry_failed` URI -query parameter, which will attempt a single retry round for these shards. - - -[[cluster-reroute-api-query-params]] -[role="child_attributes"] -==== {api-query-parms-title} - -`dry_run`:: - (Optional, Boolean) If `true`, then the request simulates the operation only - and returns the resulting state. - -`explain`:: - (Optional, Boolean) If `true`, then the response contains an explanation of - why the commands can or cannot be executed. - -`metric`:: - (Optional, string) Limits the information returned to the specified metrics. - All options except `none` are deprecated and should be avoided for this parameter. - Defaults to all but metadata. The following options are available: - -+ -.Options for `metric` -[%collapsible%open] -====== -`_all`:: - Shows all metrics. - -`blocks`:: - Shows the `blocks` part of the response. - -`master_node`:: - Shows the elected `master_node` part of the response. - -`metadata`:: - Shows the `metadata` part of the response. If you supply a comma separated - list of indices, the returned output will only contain metadata for these - indices. - -`nodes`:: - Shows the `nodes` part of the response. - -`none`:: - Excludes the entire `state` field from the response. - -`routing_table`:: - Shows the `routing_table` part of the response. - -`version`:: - Shows the cluster state version. -====== - - -`retry_failed`:: - (Optional, Boolean) If `true`, then retries allocation of shards that are - blocked due to too many subsequent allocation failures. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] - -[role="child_attributes"] -[[cluster-reroute-api-request-body]] -==== {api-request-body-title} - -`commands`:: - (Required, array of objects) Defines the commands to perform. Supported commands are: - -+ -.Properties of `commands` -[%collapsible%open] -====== - -`move`:: - Move a started shard from one node to another node. Accepts `index` and - `shard` for index name and shard number, `from_node` for the node to move - the shard from, and `to_node` for the node to move the shard to. - -`cancel`:: - Cancel allocation of a shard (or recovery). Accepts `index` and `shard` for - index name and shard number, and `node` for the node to cancel the shard - allocation on. This can be used to force resynchronization of existing - replicas from the primary shard by cancelling them and allowing them to be - reinitialized through the standard recovery process. By default only - replica shard allocations can be cancelled. If it is necessary to cancel - the allocation of a primary shard then the `allow_primary` flag must also - be included in the request. - -`allocate_replica`:: - Allocate an unassigned replica shard to a node. Accepts `index` and `shard` - for index name and shard number, and `node` to allocate the shard to. Takes - <> into account. - -Two more commands are available that allow the allocation of a primary shard to -a node. These commands should however be used with extreme care, as primary -shard allocation is usually fully automatically handled by {es}. Reasons why a -primary shard cannot be automatically allocated include the -following: - -- A new index was created but there is no node which satisfies the allocation - deciders. -- An up-to-date shard copy of the data cannot be found on the current data - nodes in the cluster. To prevent data loss, the system does not automatically -promote a stale shard copy to primary. - -The following two commands are dangerous and may result in data loss. They are -meant to be used in cases where the original data can not be recovered and the -cluster administrator accepts the loss. If you have suffered a temporary issue -that can be fixed, please see the `retry_failed` flag described above. To -emphasise: if these commands are performed and then a node joins the cluster -that holds a copy of the affected shard then the copy on the newly-joined node -will be deleted or overwritten. - -`allocate_stale_primary`:: - Allocate a primary shard to a node that holds a stale copy. Accepts the - `index` and `shard` for index name and shard number, and `node` to allocate - the shard to. Using this command may lead to data loss for the provided - shard id. If a node which has the good copy of the data rejoins the cluster - later on, that data will be deleted or overwritten with the data of the - stale copy that was forcefully allocated with this command. To ensure that - these implications are well-understood, this command requires the flag - `accept_data_loss` to be explicitly set to `true`. - -`allocate_empty_primary`:: - Allocate an empty primary shard to a node. Accepts the `index` and `shard` - for index name and shard number, and `node` to allocate the shard to. Using - this command leads to a complete loss of all data that was indexed into - this shard, if it was previously started. If a node which has a copy of the - data rejoins the cluster later on, that data will be deleted. To ensure - that these implications are well-understood, this command requires the flag - `accept_data_loss` to be explicitly set to `true`. -====== - -[[cluster-reroute-api-example]] -==== {api-examples-title} - -This is a short example of a simple reroute API call: - -[source,console] --------------------------------------------------- -POST /_cluster/reroute -{ - "commands": [ - { - "move": { - "index": "test", "shard": 0, - "from_node": "node1", "to_node": "node2" - } - }, - { - "allocate_replica": { - "index": "test", "shard": 1, - "node": "node3" - } - } - ] -} --------------------------------------------------- -// TEST[skip:doc tests run with only a single node] diff --git a/docs/reference/cluster/state.asciidoc b/docs/reference/cluster/state.asciidoc deleted file mode 100644 index bf2863018893a..0000000000000 --- a/docs/reference/cluster/state.asciidoc +++ /dev/null @@ -1,166 +0,0 @@ -[[cluster-state]] -=== Cluster state API -++++ -Cluster state -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Returns an internal representation of the cluster state for debugging or -diagnostic purposes. - -[[cluster-state-api-request]] -==== {api-request-title} - -`GET /_cluster/state//` - -[[cluster-state-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[cluster-state-api-desc]] -==== {api-description-title} - -The _cluster state_ is an internal data structure which keeps track of a -variety of information needed by every node, including: - -* The identity and attributes of the other nodes in the cluster - -* Cluster-wide settings - -* Index metadata, including the mapping and settings for each index - -* The location and status of every shard copy in the cluster - -The elected master node ensures that every node in the cluster has a copy of -the same cluster state. The cluster state API lets you retrieve a -representation of this internal state for debugging or diagnostic purposes. You -may need to consult the {es} source code to determine the precise meaning of -the response. - -By default the cluster state API will route requests to the elected master node -since this node is the authoritative source of cluster states. You can also -retrieve the cluster state held on the node handling the API request by adding -the query parameter `?local=true`. - -{es} may need to expend significant effort to compute a response to this API in -larger clusters, and the response may comprise a very large quantity of data. -If you use this API repeatedly, your cluster may become unstable. - -[[cluster-state-api-unstable-warning]] -WARNING: The response is a representation of an internal data structure. Its -format is not subject to the same compatibility guarantees as other more stable -APIs and may change from version to version. **Do not query this API using -external monitoring tools.** Instead, obtain the information you require using -other more stable <>. - -[[cluster-state-api-path-params]] -==== {api-path-parms-title} - -The cluster state can sometimes be very large, and {es} may consume significant -resources while computing a response to this API. To reduce the size of the -response, you can request only the part of the cluster state in which you are -interested: - -``:: - (Optional, string) A comma-separated list of the following options: -+ --- - `_all`:: - Shows all metrics. - - `blocks`:: - Shows the `blocks` part of the response. - - `master_node`:: - Shows the `master_node` part of the response. - - `metadata`:: - Shows the `metadata` part of the response. If you supply a comma separated - list of indices, the returned output will only contain metadata for these - indices. - - `nodes`:: - Shows the `nodes` part of the response. - - `routing_nodes`:: - Shows the `routing_nodes` part of the response. - - `routing_table`:: - Shows the `routing_table` part of the response. If you supply a comma - separated list of indices, the returned output will only contain the - routing table for these indices. - - `version`:: - Shows the cluster state version. --- - -``:: -(Optional, string) Comma-separated list of data streams, indices, and aliases -used to limit the request. Supports wildcards (`*`). To target all data streams -and indices, omit this parameter or use `*` or `_all`. - -[[cluster-state-api-query-params]] -==== {api-query-parms-title} - -`allow_no_indices`:: - (Optional, Boolean) If `true`, the wildcard indices expression that resolves - into no concrete indices will be ignored. (This includes `_all` string or - when no indices have been specified). -+ -Defaults to `true`. - -`expand_wildcards`:: - (Optional, string) Whether to expand wildcard expression to concrete indices - that are open, closed or both. Available options: `open`, `closed`, `none`, - `all`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=flat-settings] - -`ignore_unavailable`:: - (Optional, Boolean) If `true`, unavailable indices (missing or closed) will - be ignored. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=local] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -`wait_for_metadata_version`:: - (Optional, integer) Waits for the metadata version to be equal or greater - than the specified metadata version. - -`wait_for_timeout`:: - (Optional, <>) Specifies the maximum time to wait - for wait_for_metadata_version before timing out. - - -[[cluster-state-api-example]] -==== {api-examples-title} - -The following example returns only `metadata` and `routing_table` data for the -`foo` and `bar` data streams or indices: - -[source,console] --------------------------------------------------- -GET /_cluster/state/metadata,routing_table/foo,bar --------------------------------------------------- - -The next example returns all available metadata for `foo` and `bar`: - -[source,console] --------------------------------------------------- -GET /_cluster/state/_all/foo,bar --------------------------------------------------- - -This example returns only the `blocks` metadata: - -[source,console] --------------------------------------------------- -GET /_cluster/state/blocks --------------------------------------------------- diff --git a/docs/reference/cluster/stats.asciidoc b/docs/reference/cluster/stats.asciidoc deleted file mode 100644 index f078fd2b7f2ee..0000000000000 --- a/docs/reference/cluster/stats.asciidoc +++ /dev/null @@ -1,1943 +0,0 @@ -[[cluster-stats]] -=== Cluster stats API - -++++ -Cluster stats -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Returns cluster statistics. - -[[cluster-stats-api-request]] -==== {api-request-title} - -`GET /_cluster/stats` + - -`GET /_cluster/stats/nodes/` - -[[cluster-stats-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. -[[cluster-stats-api-desc]] -==== {api-description-title} - -The Cluster Stats API allows to retrieve statistics from a cluster wide perspective. -The API returns basic index metrics (shard numbers, store size, memory usage) and information about the current nodes that form the cluster (number, roles, os, jvm versions, memory usage, cpu and installed plugins). - -[[cluster-stats-api-path-params]] -==== {api-path-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=node-filter] - -[[cluster-stats-api-query-params]] -==== {api-query-parms-title} - -`timeout`:: -(Optional, <>) Period to wait for each node to respond. -If a node does not respond before its timeout expires, the response does not include its stats. -However, timed out nodes are included in the response's `_nodes.failed` property. -Defaults to no timeout. - -`include_remotes`:: -(Optional, Boolean) If `true`, includes remote cluster information in the response. -Defaults to `false`, so no remote cluster information is returned. - -[role="child_attributes"] -[[cluster-stats-api-response-body]] -==== {api-response-body-title} - -`_nodes`:: -(object) Contains statistics about the number of nodes selected by the request's -<>. -+ -.Properties of `_nodes` -[%collapsible%open] -==== -`total`:: -(integer) Total number of nodes selected by the request. - -`successful`:: -(integer) Number of nodes that responded successfully to the request. - -`failed`:: -(integer) Number of nodes that rejected the request or failed to respond. -If this value is not `0`, a reason for the rejection or failure is included in the response. - -==== - -`cluster_name`:: -(string) Name of the cluster, based on the <> setting. - -`cluster_uuid`:: -(string) Unique identifier for the cluster. - -`timestamp`:: -(integer) -{wikipedia}/Unix_time[Unix timestamp], in milliseconds, of the last time the cluster statistics were refreshed. - -`status`:: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cluster-health-status] -+ -See <>. - -[[cluster-stats-api-response-body-indices]] -`indices`:: -(object) Contains statistics about indices with shards assigned to selected nodes. -+ -.Properties of `indices` -[%collapsible%open] -==== - -`count`:: -(integer) Total number of indices with shards assigned to selected nodes. - -`shards`:: -(object) Contains statistics about shards assigned to selected nodes. -+ -.Properties of `shards` -[%collapsible%open] -===== -`total`:: -(integer) Total number of shards assigned to selected nodes. - -`primaries`:: -(integer) Number of primary shards assigned to selected nodes. - -`replication`:: -(float) Ratio of replica shards to primary shards across all selected nodes. - -`index`:: -(object) Contains statistics about shards assigned to selected nodes. -+ -.Properties of `index` -[%collapsible%open] -====== -`shards`:: -(object) Contains statistics about the number of shards assigned to selected nodes. -+ -.Properties of `shards` -[%collapsible%open] -======= -`min`:: -(integer) Minimum number of shards in an index, counting only shards assigned to selected nodes. - -`max`:: -(integer) Maximum number of shards in an index, counting only shards assigned to selected nodes. - -`avg`:: -(float) Mean number of shards in an index, counting only shards assigned to selected nodes. - -======= - -`primaries`:: -(object) Contains statistics about the number of primary shards assigned to selected nodes. -+ -.Properties of `primaries` -[%collapsible%open] -======= - -`min`:: -(integer) Minimum number of primary shards in an index, counting only shards assigned to selected nodes. - -`max`:: -(integer) Maximum number of primary shards in an index, counting only shards assigned to selected nodes. - -`avg`:: -(float) Mean number of primary shards in an index, counting only shards assigned to selected nodes. - -======= - -`replication`:: -(object) Contains statistics about the number of replication shards assigned to selected nodes. -+ -.Properties of `replication` -[%collapsible%open] -======= - -`min`:: -(float) Minimum replication factor in an index, counting only shards assigned to selected nodes. - -`max`:: -(float) Maximum replication factor in an index, counting only shards assigned to selected nodes. - -`avg`:: -(float) Mean replication factor in an index, counting only shards assigned to selected nodes. - -======= -====== -===== - -`docs`:: -(object) Contains counts for documents in selected nodes. -+ -.Properties of `docs` -[%collapsible%open] -===== - -`count`:: -(integer) Total number of non-deleted documents across all primary shards assigned to selected nodes. -+ -This number is based on documents in Lucene segments and may include documents from nested fields. - -`deleted`:: -(integer) Total number of deleted documents across all primary shards assigned to selected nodes. -+ -This number is based on documents in Lucene segments. {es} reclaims the disk space of deleted Lucene documents when a segment is merged. - -`total_size_in_bytes`:: -(integer) Total size in bytes across all primary shards assigned to selected nodes. - -`total_size`:: -(string) Total size across all primary shards assigned to selected nodes, as a human-readable string. - -===== - -`store`:: -(object) Contains statistics about the size of shards assigned to selected nodes. -+ -.Properties of `store` -[%collapsible%open] -===== - -`size`:: -(<>) Total size of all shards assigned to selected nodes. - -`size_in_bytes`:: -(integer) Total size, in bytes, of all shards assigned to selected nodes. - -`total_data_set_size`:: -(<>) Total data set size of all shards assigned to selected nodes. -This includes the size of shards not stored fully on the nodes, such as the cache for <>. - -`total_data_set_size_in_bytes`:: -(integer) Total data set size, in bytes, of all shards assigned to selected nodes. -This includes the size of shards not stored fully on the nodes, such as the cache for <>. - -`reserved`:: -(<>) A prediction of how much larger the shard stores will eventually grow due to ongoing peer recoveries, restoring snapshots, and similar activities. - -`reserved_in_bytes`:: -(integer) A prediction, in bytes, of how much larger the shard stores will eventually grow due to ongoing peer recoveries, restoring snapshots, and similar activities. - -===== - -`fielddata`:: -(object) Contains statistics about the <> of selected nodes. -+ -.Properties of `fielddata` -[%collapsible%open] -===== - -`memory_size`:: -(<>) Total amount of memory used for the field data cache across all shards assigned to selected nodes. - -`memory_size_in_bytes`:: -(integer) Total amount, in bytes, of memory used for the field data cache across all shards assigned to selected nodes. - -`evictions`:: -(integer) Total number of evictions from the field data cache across all shards assigned to selected nodes. - -`global_ordinals.build_time`:: -(<>) The total time spent building global ordinals for all fields. - -`global_ordinals.build_time_in_millis`:: -(integer) The total time, in milliseconds, spent building global ordinals for all fields. - -`global_ordinals.fields.[field-name].build_time`:: -(<>) The total time spent building global ordinals for field with specified name. - -`global_ordinals.fields.[field-name].build_time_in_millis`:: -(integer) The total time, in milliseconds, spent building global ordinals for field with specified name. - -`global_ordinals.fields.[field-name].shard_max_value_count`:: -(long) The total time spent building global ordinals for field with specified name. - -===== - -`query_cache`:: -(object) Contains statistics about the query cache of selected nodes. -+ -.Properties of `query_cache` -[%collapsible%open] -===== - -`memory_size`:: -(<>) Total amount of memory used for the query cache across all shards assigned to selected nodes. - -`memory_size_in_bytes`:: -(integer) Total amount, in bytes, of memory used for the query cache across all shards assigned to selected nodes. - -`total_count`:: -(integer) Total count of hits and misses in the query cache across all shards assigned to selected nodes. - -`hit_count`:: -(integer) Total count of query cache hits across all shards assigned to selected nodes. - -`miss_count`:: -(integer) Total count of query cache misses across all shards assigned to selected nodes. - -`cache_size`:: -(integer) Total number of entries currently in the query cache across all shards assigned to selected nodes. - -`cache_count`:: -(integer) Total number of entries added to the query cache across all shards assigned to selected nodes. -This number includes current and evicted entries. - -`evictions`:: -(integer) Total number of query cache evictions across all shards assigned to selected nodes. - -===== - -`completion`:: -(object) Contains statistics about memory used for completion in selected nodes. -+ -.Properties of `completion` -[%collapsible%open] -===== - -`size`:: -(<>) Total amount of memory used for completion across all shards assigned to selected nodes. - -`size_in_bytes`:: -(integer) Total amount, in bytes, of memory used for completion across all shards assigned to selected nodes. - -===== - -`segments`:: -(object) Contains statistics about segments in selected nodes. -+ -.Properties of `segments` -[%collapsible%open] -===== - -`count`:: -(integer) Total number of segments across all shards assigned to selected nodes. - -`memory`:: -(<>) Total amount of memory used for segments across all shards assigned to selected nodes. - -`memory_in_bytes`:: -(integer) Total amount, in bytes, of memory used for segments across all shards assigned to selected nodes. - -`terms_memory`:: -(<>) Total amount of memory used for terms across all shards assigned to selected nodes. - -`terms_memory_in_bytes`:: -(integer) Total amount, in bytes, of memory used for terms across all shards assigned to selected nodes. - -`stored_fields_memory`:: -(<>) Total amount of memory used for stored fields across all shards assigned to selected nodes. - -`stored_fields_memory_in_bytes`:: -(integer) Total amount, in bytes, of memory used for stored fields across all shards assigned to selected nodes. - -`term_vectors_memory`:: -(<>) Total amount of memory used for term vectors across all shards assigned to selected nodes. - -`term_vectors_memory_in_bytes`:: -(integer) Total amount, in bytes, of memory used for term vectors across all shards assigned to selected nodes. - -`norms_memory`:: -(<>) Total amount of memory used for normalization factors across all shards assigned to selected nodes. - -`norms_memory_in_bytes`:: -(integer) Total amount, in bytes, of memory used for normalization factors across all shards assigned to selected nodes. - -`points_memory`:: -(<>) Total amount of memory used for points across all shards assigned to selected nodes. - -`points_memory_in_bytes`:: -(integer) Total amount, in bytes, of memory used for points across all shards assigned to selected nodes. - -`doc_values_memory`:: -(<>) Total amount of memory used for doc values across all shards assigned to selected nodes. - -`doc_values_memory_in_bytes`:: -(integer) Total amount, in bytes, of memory used for doc values across all shards assigned to selected nodes. - -`index_writer_memory`:: -(<>) Total amount of memory used by all index writers across all shards assigned to selected nodes. - -`index_writer_memory_in_bytes`:: -(integer) Total amount, in bytes, of memory used by all index writers across all shards assigned to selected nodes. - -`version_map_memory`:: -(<>) Total amount of memory used by all version maps across all shards assigned to selected nodes. - -`version_map_memory_in_bytes`:: -(integer) Total amount, in bytes, of memory used by all version maps across all shards assigned to selected nodes. - -`fixed_bit_set`:: -(<>) Total amount of memory used by fixed bit sets across all shards assigned to selected nodes. -+ -Fixed bit sets are used for nested object field types and type filters for <> fields. - -`fixed_bit_set_memory_in_bytes`:: -(integer) Total amount of memory, in bytes, used by fixed bit sets across all shards assigned to selected nodes. - -`max_unsafe_auto_id_timestamp`:: -(integer) -{wikipedia}/Unix_time[Unix timestamp], in milliseconds, of the most recently retried indexing request. - -`file_sizes`:: -(object) This object is not populated by the cluster stats API. -+ -To get information on segment files, use the <>. - -===== - -`mappings`:: -(object) Contains statistics about <> in selected nodes. -+ -.Properties of `mappings` -[%collapsible%open] -===== - -`total_field_count`:: -(integer) Total number of fields in all non-system indices. - -`total_deduplicated_field_count`:: -(integer) Total number of fields in all non-system indices, accounting for mapping deduplication. - -`total_deduplicated_mapping_size`:: -(<>) Total size of all mappings after deduplication and compression. - -`total_deduplicated_mapping_size_in_bytes`:: -(integer) Total size of all mappings, in bytes, after deduplication and compression. - -`field_types`:: -(array of objects) Contains statistics about <> used in selected nodes. -+ -.Properties of `field_types` objects -[%collapsible%open] -====== - -`name`:: -(string) Field data type used in selected nodes. - -`count`:: -(integer) Number of fields mapped to the field data type in selected nodes. - -`index_count`:: -(integer) Number of indices containing a mapping of the field data type in selected nodes. - -`indexed_vector_count`:: -(integer) For dense_vector field types, number of indexed vector types in selected nodes. - -`indexed_vector_dim_min`:: -(integer) For dense_vector field types, the minimum dimension of all indexed vector types in selected nodes. - -`indexed_vector_dim_max`:: -(integer) For dense_vector field types, the maximum dimension of all indexed vector types in selected nodes. - -`vector_index_type_count`:: -(object) For dense_vector field types, the number of indexed vector types by index type in selected nodes. - -`vector_similarity_type_count`:: -(object) For dense_vector field types, the number of vector types by similarity type in selected nodes. - -`vector_element_type_count`:: -(object) For dense_vector field types, the number of vector types by element type in selected nodes. - -`script_count`:: -(integer) Number of fields that declare a script. - -`lang`:: -(array of strings) Script languages used for the optional scripts - -`lines_max`:: -(integer) Maximum number of lines for a single field script - -`lines_total`:: -(integer) Total number of lines for the scripts - -`chars_max`:: -(integer) Maximum number of characters for a single field script - -`chars_total`:: -(integer) Total number of characters for the scripts - -`source_max`:: -(integer) Maximum number of accesses to _source for a single field script - -`source_total`:: -(integer) Total number of accesses to _source for the scripts - -`doc_max`:: -(integer) Maximum number of accesses to doc_values for a single field script - -`doc_total`:: -(integer) Total number of accesses to doc_values for the scripts - -====== - -`runtime_field_types`:: -(array of objects) Contains statistics about <> used in selected nodes. -+ -.Properties of `runtime_field_types` objects -[%collapsible%open] -====== - -`name`:: -(string) Field data type used in selected nodes. - -`count`:: -(integer) Number of runtime fields mapped to the field data type in selected nodes. - -`index_count`:: -(integer) Number of indices containing a mapping of the runtime field data type in selected nodes. - -`scriptless_count`:: -(integer) Number of runtime fields that don't declare a script. - -`shadowed_count`:: -(integer) Number of runtime fields that shadow an indexed field. - -`lang`:: -(array of strings) Script languages used for the runtime fields scripts - -`lines_max`:: -(integer) Maximum number of lines for a single runtime field script - -`lines_total`:: -(integer) Total number of lines for the scripts that define the current runtime field data type - -`chars_max`:: -(integer) Maximum number of characters for a single runtime field script - -`chars_total`:: -(integer) Total number of characters for the scripts that define the current runtime field data type - -`source_max`:: -(integer) Maximum number of accesses to _source for a single runtime field script - -`source_total`:: -(integer) Total number of accesses to _source for the scripts that define the current runtime field data type - -`doc_max`:: -(integer) Maximum number of accesses to doc_values for a single runtime field script - -`doc_total`:: -(integer) Total number of accesses to doc_values for the scripts that define the current runtime field data type - -====== -===== - -`analysis`:: -(object) Contains statistics about <> -used in selected nodes. -+ -.Properties of `analysis` -[%collapsible%open] -===== - -`char_filter_types`:: -(array of objects) Contains statistics about <> types used in selected nodes. -+ -.Properties of `char_filter_types` objects -[%collapsible%open] -====== - -`name`:: -(string) Character filter type used in selected nodes. - -`count`:: -(integer) Number of analyzers or normalizers using the character filter type in selected nodes. - -`index_count`:: -(integer) Number of indices the character filter type in selected nodes. - -====== - -`tokenizer_types`:: -(array of objects) Contains statistics about <> types used in selected nodes. -+ -.Properties of `tokenizer_types` objects -[%collapsible%open] -====== - -`name`:: -(string) Tokenizer type used in selected nodes. - -`count`:: -(integer) Number of analyzers or normalizers using the tokenizer type in selected nodes. - -`index_count`:: -(integer) Number of indices using the tokenizer type in selected nodes. - -====== - -`filter_types`:: -(array of objects) Contains statistics about <> types used in selected nodes. -+ -.Properties of `filter_types` objects -[%collapsible%open] -====== - -`name`:: -(string) Token filter type used in selected nodes. - -`count`:: -(integer) Number of analyzers or normalizers using the token filter type in selected nodes. - -`index_count`:: -(integer) Number of indices using the token filter type in selected nodes. - -====== - -`analyzer_types`:: -(array of objects) Contains statistics about <> types used in selected nodes. -+ -.Properties of `analyzer_types` objects -[%collapsible%open] -====== - -`name`:: -(string) Analyzer type used in selected nodes. - -`count`:: -(integer) Occurrences of the analyzer type in selected nodes. - -`index_count`:: -(integer) Number of indices using the analyzer type in selected nodes. - -====== - -`built_in_char_filters`:: -(array of objects) Contains statistics about built-in <> -used in selected nodes. -+ -.Properties of `built_in_char_filters` objects -[%collapsible%open] -====== - -`name`:: -(string) Built-in character filter used in selected nodes. - -`count`:: -(integer) Number of analyzers or normalizers using the built-in character filter in selected nodes. - -`index_count`:: -(integer) Number of indices using the built-in character filter in selected nodes. - -====== - -`built_in_tokenizers`:: -(array of objects) Contains statistics about built-in <> used in selected nodes. -+ -.Properties of `built_in_tokenizers` objects -[%collapsible%open] -====== - -`name`:: -(string) Built-in tokenizer used in selected nodes. - -`count`:: -(integer) Number of analyzers or normalizers using the built-in tokenizer in selected nodes. - -`index_count`:: -(integer) Number of indices using the built-in tokenizer in selected nodes. - -====== - -`built_in_filters`:: -(array of objects) Contains statistics about built-in <> used in selected nodes. -+ -.Properties of `built_in_filters` objects -[%collapsible%open] -====== - -`name`:: -(string) Built-in token filter used in selected nodes. - -`count`:: -(integer) Number of analyzers or normalizers using the built-in token filter in selected nodes. - -`index_count`:: -(integer) Number of indices using the built-in token filter in selected nodes. - -====== - -`built_in_analyzers`:: -(array of objects) Contains statistics about built-in <> used in selected nodes. -+ -.Properties of `built_in_analyzers` objects -[%collapsible%open] -====== - -`name`:: -(string) Built-in analyzer used in selected nodes. - -`count`:: -(integer) Occurrences of the built-in analyzer in selected nodes. - -`index_count`:: -(integer) Number of indices using the built-in analyzer in selected nodes. - -====== - -`synonyms`:: -(object) Contains statistics about synonyms defined in <> and <> token filters configuration. - -+ -.Properties of `synonyms` objects -[%collapsible%open] -====== - -`inline`:: -(object) Inline synonyms defined using `synonyms` configuration in synonym or synonym graph token filters. - -+ -.Properties of `inline` objects -[%collapsible%open] -======= - -`count`:: -(integer) Occurrences of inline synonyms configuration in selected nodes. -Each inline synonyms configuration will be counted separately, regardless of the synonyms defined. -Two synonyms configurations with the same synonyms will count as separate ocurrences. - -`index_count`:: -(integer) Number of indices that use inline synonyms configuration for synonyms token filters. - -======= - -`paths`:: -(object) Contains statistics about synonym files defined as `synonyms_path` in <> and <> token filters configuration. - -+ -.Properties of `paths` objects -[%collapsible%open] -======= - -`count`:: -(integer) Occurrences of unique synonym paths in selected nodes. - -`index_count`:: -(integer) Number of indices that use `synonyms_path` configuration for synonyms token filters. - -======= - -`sets`:: -(object) Contains statistics about synonyms sets configured as `synonyms_set` in <> and <> token filters configuration. - -+ -.Properties of `sets` objects -[%collapsible%open] -======= - -`count`:: -(integer) Occurrences of unique synonyms sets in selected nodes. - -`index_count`:: -(integer) Number of indices that use `synonyms_set` configuration for synonyms token filters. - -======= - -====== -===== - -`search`:: -(object) Contains usage statistics about search requests submitted to selected nodes that acted as coordinator during the search execution. -Search requests are tracked when they are successfully parsed, regardless of their results: -requests that yield errors after parsing contribute to the usage stats, as well as requests that don't access any data. - -+ -.Properties of `search` objects -[%collapsible%open] -===== - -`total`:: -(integer) Total number of incoming search requests. -Search requests that don't specify a request body are not counted. - -`queries`:: -(object) Query types used in selected nodes. -For each query, name and number of times it's been used within the `query` or `post_filter` section is reported. -Queries are counted once per search request, meaning that if the same query type is used multiple times in the same search request, its counter will be incremented by 1 rather than by the number of times it's been used in that individual search request. - -`sections`:: -(object) Search sections used in selected nodes. -For each section, name and number of times it's been used is reported. - -`retrievers`:: -(object) Retriever types that were used in selected nodes. -For each retriever, name and number of times it's been used is reported. - -===== - -`dense_vector`:: -(object) Contains statistics about indexed dense vector used in selected nodes. -+ -.Properties of `dense_vector` -[%collapsible%open] -===== - -`value_count`:: -(integer) Total number of dense vector indexed in selected nodes. - -===== - -`sparse_vector`:: -(object) Contains statistics about indexed sparse vector used in selected nodes. -+ -.Properties of `sparse_vector` -[%collapsible%open] -===== - -`value_count`:: -(integer) Total number of sparse vectors indexed across all primary shards assigned to selected nodes. - -===== -==== - -[[cluster-stats-api-response-body-nodes]] -`nodes`:: -(object) Contains statistics about nodes selected by the request's <>. -+ -.Properties of `nodes` -[%collapsible%open] -==== - -`count`:: -(object) Contains counts for nodes selected by the request's <>. -+ -.Properties of `count` -[%collapsible%open] -===== - -`total`:: -(integer) Total number of selected nodes. - -`coordinating_only`:: -(integer) Number of selected nodes without a <>. -These nodes are considered <> nodes. - -``:: -(integer) Number of selected nodes with the role. -For a list of roles, see -<>. - -===== - -`versions`:: -(array of strings) Array of {es} versions used on selected nodes. - -`os`:: -(object) Contains statistics about the operating systems used by selected nodes. -+ -.Properties of `os` -[%collapsible%open] -===== - -`available_processors`:: -(integer) Number of processors available to JVM across all selected nodes. - -`allocated_processors`:: -(integer) Number of processors used to calculate thread pool size across all selected nodes. -+ -This number can be set with the `processors` setting of a node and defaults to the number of processors reported by the OS. -In both cases, this number will never be larger than `32`. - -`names`:: -(array of objects) Contains statistics about operating systems used by selected nodes. -+ -.Properties of `names` -[%collapsible%open] -====== - -`name`::: -(string) Name of an operating system used by one or more selected nodes. - -`count`::: -(string) Number of selected nodes using the operating system. - -====== - -`pretty_names`:: -(array of objects) Contains statistics about operating systems used by selected nodes. -+ -.Properties of `pretty_names` -[%collapsible%open] -====== - -`pretty_name`::: -(string) Human-readable name of an operating system used by one or more selected nodes. - -`count`::: -(string) Number of selected nodes using the operating system. - -====== - -`architectures`:: -(array of objects) Contains statistics about processor architectures (for example, x86_64 or aarch64) used by selected nodes. -+ -.Properties of `architectures` -[%collapsible%open] -====== - -`arch`::: -(string) Name of an architecture used by one or more selected nodes. - -`count`::: -(string) Number of selected nodes using the architecture. - -====== - -`mem`:: -(object) Contains statistics about memory used by selected nodes. -+ -.Properties of `mem` -[%collapsible%open] -====== - -`total`:: -(<>) Total amount of physical memory across all selected nodes. - -`total_in_bytes`:: -(integer) Total amount, in bytes, of physical memory across all selected nodes. - -`adjusted_total`:: -(<>) Total amount of memory across all selected nodes, but using the value specified using the `es.total_memory_bytes` system property instead of measured total memory for those nodes where that system property was set. - -`adjusted_total_in_bytes`:: -(integer) Total amount, in bytes, of memory across all selected nodes, but using the value specified using the `es.total_memory_bytes` system property instead of measured total memory for those nodes where that system property was set. - -`free`:: -(<>) Amount of free physical memory across all selected nodes. - -`free_in_bytes`:: -(integer) Amount, in bytes, of free physical memory across all selected nodes. - -`used`:: -(<>) Amount of physical memory in use across all selected nodes. - -`used_in_bytes`:: -(integer) Amount, in bytes, of physical memory in use across all selected nodes. - -`free_percent`:: -(integer) Percentage of free physical memory across all selected nodes. - -`used_percent`:: -(integer) Percentage of physical memory in use across all selected nodes. - -====== -===== - -`process`:: -(object) Contains statistics about processes used by selected nodes. -+ -.Properties of `process` -[%collapsible%open] -===== - -`cpu`:: -(object) Contains statistics about CPU used by selected nodes. -+ -.Properties of `cpu` -[%collapsible%open] -====== - -`percent`:: -(integer) Percentage of CPU used across all selected nodes. -Returns `-1` if not supported. - -====== - -`open_file_descriptors`:: -(object) Contains statistics about open file descriptors in selected nodes. -+ -.Properties of `open_file_descriptors` -[%collapsible%open] -====== - -`min`:: -(integer) Minimum number of concurrently open file descriptors across all selected nodes. -Returns `-1` if not supported. - -`max`:: -(integer) Maximum number of concurrently open file descriptors allowed across all selected nodes. -Returns `-1` if not supported. - -`avg`:: -(integer) Average number of concurrently open file descriptors. -Returns `-1` if not supported. - -====== -===== - -`jvm`:: -(object) Contains statistics about the Java Virtual Machines (JVMs) used by selected nodes. -+ -.Properties of `jvm` -[%collapsible%open] -===== - -`max_uptime`:: -(<>) Uptime duration since JVM last started. - -`max_uptime_in_millis`:: -(integer) Uptime duration, in milliseconds, since JVM last started. - -`versions`:: -(array of objects) Contains statistics about the JVM versions used by selected nodes. -+ -.Properties of `versions` -[%collapsible%open] -====== - -`version`:: -(string) Version of JVM used by one or more selected nodes. - -`vm_name`:: -(string) Name of the JVM. - -`vm_version`:: -(string) Full version number of JVM. -+ -The full version number includes a plus sign (`+`) followed by the build number. - -`vm_vendor`:: -(string) Vendor of the JVM. - -`bundled_jdk`:: -(Boolean) Always `true`. -All distributions come with a bundled Java Development Kit (JDK). - -`using_bundled_jdk`:: -(Boolean) If `true`, a bundled JDK is in use by JVM. - -`count`:: -(integer) Total number of selected nodes using JVM. - -====== - -`mem`:: -(object) Contains statistics about memory used by selected nodes. -+ -.Properties of `mem` -[%collapsible%open] -====== - -`heap_used`:: -(<>) Memory currently in use by the heap across all selected nodes. - -`heap_used_in_bytes`:: -(integer) Memory, in bytes, currently in use by the heap across all selected nodes. - -`heap_max`:: -(<>) Maximum amount of memory, in bytes, available for use by the heap across all selected nodes. - -`heap_max_in_bytes`:: -(integer) Maximum amount of memory, in bytes, available for use by the heap across all selected nodes. - -====== - -`threads`:: -(integer) Number of active threads in use by JVM across all selected nodes. - -===== - -`fs`:: -(object) Contains statistics about file stores by selected nodes. -+ -.Properties of `fs` -[%collapsible%open] -===== - -`total`:: -(<>) Total size of all file stores across all selected nodes. - -`total_in_bytes`:: -(integer) Total size, in bytes, of all file stores across all selected nodes. - -`free`:: -(<>) Amount of unallocated disk space in file stores across all selected nodes. - -`free_in_bytes`:: -(integer) Total number of unallocated bytes in file stores across all selected nodes. - -`available`:: -(<>) Total amount of disk space available to JVM in file stores across all selected nodes. -+ -Depending on OS or process-level restrictions, this amount may be less than -`nodes.fs.free`. -This is the actual amount of free disk space the selected {es} -nodes can use. - -`available_in_bytes`:: -(integer) Total number of bytes available to JVM in file stores across all selected nodes. -+ -Depending on OS or process-level restrictions, this number may be less than -`nodes.fs.free_in_byes`. -This is the actual amount of free disk space the selected {es} nodes can use. - -===== - -`plugins`:: -(array of objects) Contains statistics about installed plugins and modules by selected nodes. -+ -If no plugins or modules are installed, this array is empty. -+ -.Properties of `plugins` -[%collapsible%open] -===== - -``:: -(object) Contains statistics about an installed plugin or module. -+ -.Properties of `` -[%collapsible%open] -====== - -`name`::: -(string) Name of the {es} plugin. - -`version`::: -(string) -{es} version for which the plugin was built. - -`elasticsearch_version`::: -(string) -{es} version for which the plugin was built. - -`java_version`::: -(string) Java version for which the plugin was built. - -`description`::: -(string) Short description of the plugin. - -`classname`::: -(string) Class name used as the plugin's entry point. - -`extended_plugins`::: -(array of strings) An array of other plugins extended by this plugin through the Java Service Provider Interface (SPI). -+ -If this plugin extends no other plugins, this array is empty. - -`has_native_controller`::: -(Boolean) If `true`, the plugin has a native controller process. - -====== - -===== - -`network_types`:: -(object) Contains statistics about the transport and HTTP networks used by selected nodes. -+ -.Properties of `network_types` -[%collapsible%open] -===== - -`transport_types`:: -(object) Contains statistics about the transport network types used by selected nodes. -+ -.Properties of `transport_types` -[%collapsible%open] -====== - -``:: -(integer) Number of selected nodes using the transport type. - -====== - -`http_types`:: -(object) Contains statistics about the HTTP network types used by selected nodes. -+ -.Properties of `http_types` -[%collapsible%open] -====== - -``:: -(integer) Number of selected nodes using the HTTP type. - -====== -===== - -`discovery_types`:: -(object) Contains statistics about the <> used by selected nodes. -+ -.Properties of `discovery_types` -[%collapsible%open] -===== - -``:: -(integer) Number of selected nodes using the <> to find other nodes. - -===== - -`packaging_types`:: -(array of objects) Contains statistics about {es} distributions installed on selected nodes. -+ -.Properties of `packaging_types` -[%collapsible%open] -===== - -`flavor`::: -(string) Type of {es} distribution. -This is always `default`. - -`type`::: -(string) File type, such as `tar` or `zip`, used for the distribution package. - -`count`::: -(integer) Number of selected nodes using the distribution flavor and file type. - -===== - -==== - -`snapshots`:: -(object) Contains statistics about the <> activity in the cluster. -+ -.Properties of `snapshots` -[%collapsible%open] -===== - -`current_counts`::: -(object) Contains statistics which report the numbers of various ongoing snapshot activities in the cluster. -+ -.Properties of `current_counts` -[%collapsible%open] -====== - -`snapshots`::: -(integer) The total number of snapshots and clones currently being created by the cluster. - -`shard_snapshots`::: -(integer) The total number of outstanding shard snapshots in the cluster. - -`snapshot_deletions`::: -(integer) The total number of snapshot deletion operations that the cluster is currently running. - -`concurrent_operations`::: -(integer) The total number of snapshot operations that the cluster is currently running concurrently. -This is the total of the `snapshots` and `snapshot_deletions` -entries, and is limited by <>. - -`cleanups`::: -(integer) The total number of repository cleanup operations that the cluster is currently running. -These operations do not count towards the total number of concurrent operations. - -====== - - -`repositories`::: -(object) Contains statistics which report the progress of snapshot activities broken down by repository. -This object contains one entry for each repository registered with the cluster. -+ -.Properties of `repositories` -[%collapsible%open] -====== - -`current_counts`::: -(object) Contains statistics which report the numbers of various ongoing snapshot activities for this repository. -+ -.Properties of `current_counts` -[%collapsible%open] -======= - -`snapshots`::: -(integer) The total number of ongoing snapshots in this repository. - -`clones`::: -(integer) The total number of ongoing snapshot clones in this repository. - -`finalizations`::: -(integer) The total number of this repository's ongoing snapshots and clone operations which are mostly complete except for their last "finalization" step. - -`deletions`::: -(integer) The total number of ongoing snapshot deletion operations in this repository. - -`snapshot_deletions`::: -(integer) The total number of snapshots that are currently being deleted from this repository. - -`active_deletions`::: -(integer) The total number of ongoing snapshot deletion operations which are currently active in this repository. -Snapshot deletions do not run concurrently with other snapshot operations, so this may be `0` if any pending deletes are waiting for other operations to finish. - -`shards`::: -(object) Contains statistics which report the shard-level progress of ongoing snapshot activities for a repository. -Note that these statistics relate only to ongoing snapshots. -+ -.Properties of `shards` -[%collapsible%open] -======== - -`total`::: -(integer) The total number of shard snapshots currently tracked by this repository. -This statistic only counts shards in ongoing snapshots, so it will drop when a snapshot completes and will be `0` if there are no ongoing snapshots. - -`complete`::: -(integer) The total number of tracked shard snapshots which have completed in this repository. -This statistic only counts shards in ongoing snapshots, so it will drop when a snapshot completes and will be `0` if there are no ongoing snapshots. - -`incomplete`::: -(integer) The total number of tracked shard snapshots which have not completed in this repository. -This is the difference between the `total` and `complete` values. - -`states`::: -(object) The total number of shard snapshots in each of the named states in this repository. -These states are an implementation detail of the snapshotting process which may change between versions. -They are included here for expert users, but should otherwise be ignored. - -======== - -======= - -`oldest_start_time`::: -(string) The start time of the oldest running snapshot in this repository. - -`oldest_start_time_in_millis`::: -(integer) The start time of the oldest running snapshot in this repository, represented as milliseconds since the Unix epoch. - -====== - -===== - -==== - -`repositories`:: -(object) Contains statistics about the <> repositories defined in the cluster, broken down by repository type. -+ -.Properties of `repositories` -[%collapsible%open] -===== - -`count`::: -(integer) The number of repositories of this type in the cluster. - -`read_only`::: -(integer) The number of repositories of this type in the cluster which are registered read-only. - -`read_write`::: -(integer) The number of repositories of this type in the cluster which are not registered as read-only. - -Each repository type may also include other statistics about the repositories of that type here. - -===== - -==== - -`ccs`:: -(object) Contains information relating to <> settings and activity in the cluster. -+ -.Properties of `ccs` -[%collapsible%open] -===== - -`clusters`::: -(object) Contains remote cluster settings and metrics collected from them. -The keys are cluster names, and the values are per-cluster data. -Only present if `include_remotes` option is set to `true`. - -+ -.Properties of `clusters` -[%collapsible%open] -====== - -`cluster_uuid`::: -(string) The UUID of the remote cluster. - -`mode`::: -(string) The <> used to communicate with the remote cluster. - -`skip_unavailable`::: -(Boolean) The `skip_unavailable` <> used for this remote cluster. - -`transport.compress`::: -(string) Transport compression setting used for this remote cluster. - -`version`::: -(array of strings) The list of {es} versions used by the nodes on the remote cluster. - -`status`::: -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cluster-health-status] -+ -See <>. - -`nodes_count`::: -(integer) The total count of nodes in the remote cluster. - -`shards_count`::: -(integer) The total number of shards in the remote cluster. - -`indices_count`::: -(integer) The total number of indices in the remote cluster. - -`indices_total_size_in_bytes`::: -(integer) Total data set size, in bytes, of all shards assigned to selected nodes. - -`indices_total_size`::: -(string) Total data set size, in bytes, of all shards assigned to selected nodes, as a human-readable string. - -`max_heap_in_bytes`::: -(integer) Maximum amount of memory, in bytes, available for use by the heap across the nodes of the remote cluster. - -`max_heap`::: -(string) Maximum amount of memory, in bytes, available for use by the heap across the nodes of the remote cluster, -as a human-readable string. - -`mem_total_in_bytes`::: -(integer) Total amount, in bytes, of physical memory across the nodes of the remote cluster. - -`mem_total`::: -(string) Total amount, in bytes, of physical memory across the nodes of the remote cluster, as a human-readable string. - -====== - - -`_search`::: -(object) Contains information about <> usage. -+ -.Properties of `_search` -[%collapsible%open] -====== - -`total`::: -(integer) The total number of {ccs} requests that have been executed by the cluster. - -`success`::: -(integer) The total number of {ccs} requests that have been successfully executed by the cluster. - -`skipped`::: -(integer) The total number of {ccs} requests (successful or failed) that had at least one remote cluster skipped. - -`took`::: -(object) Contains statistics about the time taken to execute {ccs} requests. -+ -.Properties of `took` -[%collapsible%open] -======= - -`max`::: -(integer) The maximum time taken to execute a {ccs} request, in milliseconds. - -`avg`::: -(integer) The median time taken to execute a {ccs} request, in milliseconds. - -`p90`::: -(integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. - -======= - -`took_mrt_true`:: -(object) Contains statistics about the time taken to execute {ccs} requests for which the -<> setting was set to `true`. -+ -.Properties of `took_mrt_true` -[%collapsible%open] -======= -`max`::: -(integer) The maximum time taken to execute a {ccs} request, in milliseconds. - -`avg`::: -(integer) The median time taken to execute a {ccs} request, in milliseconds. - -`p90`::: -(integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. - -======= - -`took_mrt_false`:: -(object) Contains statistics about the time taken to execute {ccs} requests for which the -<> setting was set to `false`. -+ -.Properties of `took_mrt_false` -[%collapsible%open] -======= -`max`::: -(integer) The maximum time taken to execute a {ccs} request, in milliseconds. - -`avg`::: -(integer) The median time taken to execute a {ccs} request, in milliseconds. - -`p90`::: -(integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. - -======= - -`remotes_per_search_max`:: -(integer) The maximum number of remote clusters that were queried in a single {ccs} request. - -`remotes_per_search_avg`:: -(float) The average number of remote clusters that were queried in a single {ccs} request. - -`failure_reasons`:: -(object) Contains statistics about the reasons for {ccs} request failures. -The keys are the failure reason names and the values are the number of requests that failed for that reason. - -`features`:: -(object) Contains statistics about the features used in {ccs} requests. -The keys are the names of the search feature, and the values are the number of requests that used that feature. -Single request can use more than one feature (e.g. both `async` and `wildcard`). -Known features are: - -* `async` - <> - -* `mrt` - <> setting was set to `true`. - -* `wildcard` - <> for indices with wildcards was used in the search request. - -`clients`:: -(object) Contains statistics about the clients that executed {ccs} requests. -The keys are the names of the clients, and the values are the number of requests that were executed by that client. -Only known clients (such as `kibana` or `elasticsearch`) are counted. - -`clusters`:: -(object) Contains statistics about the clusters that were queried in {ccs} requests. -The keys are cluster names, and the values are per-cluster telemetry data. -This also includes the local cluster itself, which uses the name `(local)`. -+ -.Properties of per-cluster data: -[%collapsible%open] -======= -`total`::: -(integer) The total number of successful (not skipped) {ccs} requests that were executed against this cluster. -This may include requests where partial results were returned, but not requests in which the cluster has been skipped entirely. - -`skipped`::: -(integer) The total number of {ccs} requests for which this cluster was skipped. - -`took`::: -(object) Contains statistics about the time taken to execute requests against this cluster. -+ -.Properties of `took` -[%collapsible%open] -======== - -`max`::: -(integer) The maximum time taken to execute a {ccs} request, in milliseconds. - -`avg`::: -(integer) The median time taken to execute a {ccs} request, in milliseconds. - -`p90`::: -(integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. - -======== - -======= - - -====== -`_esql`::: -(object) Contains information about <> usage. -The structure of the object is the same as the `_search` object above. - -===== - -[[cluster-stats-api-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET /_cluster/stats?human&pretty --------------------------------------------------- -// TEST[setup:my_index] - -The API returns the following response: - -["source","js",subs="attributes,callouts"] --------------------------------------------------- -{ - "_nodes" : { - "total" : 1, - "successful" : 1, - "failed" : 0 - }, - "cluster_uuid": "YjAvIhsCQ9CbjWZb2qJw3Q", - "cluster_name": "elasticsearch", - "timestamp": 1459427693515, - "status": "green", - "indices": { - "count": 1, - "shards": { - "total": 5, - "primaries": 5, - "replication": 0, - "index": { - "shards": { - "min": 5, - "max": 5, - "avg": 5 - }, - "primaries": { - "min": 5, - "max": 5, - "avg": 5 - }, - "replication": { - "min": 0, - "max": 0, - "avg": 0 - } - } - }, - "docs": { - "count": 10, - "deleted": 0, - "total_size": "8.6kb", - "total_size_in_bytes": 8833 - }, - "store": { - "size": "16.2kb", - "size_in_bytes": 16684, - "total_data_set_size": "16.2kb", - "total_data_set_size_in_bytes": 16684, - "reserved": "0b", - "reserved_in_bytes": 0 - }, - "search": { - ... - }, - "fielddata": { - "memory_size": "0b", - "memory_size_in_bytes": 0, - "evictions": 0, - "global_ordinals": { - "build_time" : "0s", - "build_time_in_millis" : 0 - } - }, - "query_cache": { - "memory_size": "0b", - "memory_size_in_bytes": 0, - "total_count": 0, - "hit_count": 0, - "miss_count": 0, - "cache_size": 0, - "cache_count": 0, - "evictions": 0 - }, - "completion": { - "size": "0b", - "size_in_bytes": 0 - }, - "segments": { - "count": 4, - "memory": "8.6kb", - "memory_in_bytes": 0, - "terms_memory": "0b", - "terms_memory_in_bytes": 0, - "stored_fields_memory": "0b", - "stored_fields_memory_in_bytes": 0, - "term_vectors_memory": "0b", - "term_vectors_memory_in_bytes": 0, - "norms_memory": "0b", - "norms_memory_in_bytes": 0, - "points_memory" : "0b", - "points_memory_in_bytes" : 0, - "doc_values_memory": "0b", - "doc_values_memory_in_bytes": 0, - "index_writer_memory": "0b", - "index_writer_memory_in_bytes": 0, - "version_map_memory": "0b", - "version_map_memory_in_bytes": 0, - "fixed_bit_set": "0b", - "fixed_bit_set_memory_in_bytes": 0, - "max_unsafe_auto_id_timestamp" : -9223372036854775808, - "file_sizes": {} - }, - "mappings": { - "total_field_count": 0, - "total_deduplicated_field_count": 0, - "total_deduplicated_mapping_size": "0b", - "total_deduplicated_mapping_size_in_bytes": 0, - "field_types": [], - "runtime_field_types": [], - "source_modes" : { - "stored": 0 - } - }, - "analysis": { - "char_filter_types": [], - "tokenizer_types": [], - "filter_types": [], - "analyzer_types": [], - "built_in_char_filters": [], - "built_in_tokenizers": [], - "built_in_filters": [], - "built_in_analyzers": [], - "synonyms": {} - }, - "versions": [ - { - "version": "8.0.0", - "index_count": 1, - "primary_shard_count": 1, - "total_primary_size": "7.4kb", - "total_primary_bytes": 7632 - } - ], - "dense_vector": { - "value_count": 0 - }, - "sparse_vector": { - "value_count": 0 - } - }, - "nodes": { - "count": { - "total": 1, - "data": 1, - "coordinating_only": 0, - "master": 1, - "ingest": 1, - "voting_only": 0 - }, - "versions": [ - "{version}" - ], - "os": { - "available_processors": 8, - "allocated_processors": 8, - "names": [ - { - "name": "Mac OS X", - "count": 1 - } - ], - "pretty_names": [ - { - "pretty_name": "Mac OS X", - "count": 1 - } - ], - "architectures": [ - { - "arch": "x86_64", - "count": 1 - } - ], - "mem" : { - "total" : "16gb", - "total_in_bytes" : 17179869184, - "adjusted_total" : "16gb", - "adjusted_total_in_bytes" : 17179869184, - "free" : "78.1mb", - "free_in_bytes" : 81960960, - "used" : "15.9gb", - "used_in_bytes" : 17097908224, - "free_percent" : 0, - "used_percent" : 100 - } - }, - "process": { - "cpu": { - "percent": 9 - }, - "open_file_descriptors": { - "min": 268, - "max": 268, - "avg": 268 - } - }, - "jvm": { - "max_uptime": "13.7s", - "max_uptime_in_millis": 13737, - "versions": [ - { - "version": "12", - "vm_name": "OpenJDK 64-Bit Server VM", - "vm_version": "12+33", - "vm_vendor": "Oracle Corporation", - "bundled_jdk": true, - "using_bundled_jdk": true, - "count": 1 - } - ], - "mem": { - "heap_used": "57.5mb", - "heap_used_in_bytes": 60312664, - "heap_max": "989.8mb", - "heap_max_in_bytes": 1037959168 - }, - "threads": 90 - }, - "fs": { - "total": "200.6gb", - "total_in_bytes": 215429193728, - "free": "32.6gb", - "free_in_bytes": 35064553472, - "available": "32.4gb", - "available_in_bytes": 34802409472 - }, - "plugins": [ - { - "name": "analysis-icu", - "version": "{version}", - "description": "The ICU Analysis plugin integrates Lucene ICU module into elasticsearch, adding ICU relates analysis components.", - "classname": "org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin", - "has_native_controller": false - }, - ... - ], - "ingest": { - "number_of_pipelines" : 1, - "processor_stats": { - ... - } - }, - "indexing_pressure": { - "memory": { - "current": { - "combined_coordinating_and_primary": "0b", - "combined_coordinating_and_primary_in_bytes": 0, - "coordinating": "0b", - "coordinating_in_bytes": 0, - "primary": "0b", - "primary_in_bytes": 0, - "replica": "0b", - "replica_in_bytes": 0, - "all": "0b", - "all_in_bytes": 0 - }, - "total": { - "combined_coordinating_and_primary": "0b", - "combined_coordinating_and_primary_in_bytes": 0, - "coordinating": "0b", - "coordinating_in_bytes": 0, - "primary": "0b", - "primary_in_bytes": 0, - "replica": "0b", - "replica_in_bytes": 0, - "all": "0b", - "all_in_bytes": 0, - "coordinating_rejections": 0, - "primary_rejections": 0, - "replica_rejections": 0, - "primary_document_rejections": 0 - }, - "limit" : "0b", - "limit_in_bytes": 0 - } - }, - "network_types": { - ... - }, - "discovery_types": { - ... - }, - "packaging_types": [ - { - ... - } - ] - }, - "snapshots": { - ... - }, - "repositories": { - ... - }, - "ccs": { - "_search": { - "total": 7, - "success": 7, - "skipped": 0, - "took": { - "max": 36, - "avg": 20, - "p90": 33 - }, - "took_mrt_true": { - "max": 33, - "avg": 15, - "p90": 33 - }, - "took_mrt_false": { - "max": 36, - "avg": 26, - "p90": 36 - }, - "remotes_per_search_max": 3, - "remotes_per_search_avg": 2.0, - "failure_reasons": { ... }, - "features": { ... }, - "clients": { ... }, - "clusters": { ... } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"plugins": \[[^\]]*\]/"plugins": $body.$_path/] -// TESTRESPONSE[s/"network_types": \{[^\}]*\}/"network_types": $body.$_path/] -// TESTRESPONSE[s/"discovery_types": \{[^\}]*\}/"discovery_types": $body.$_path/] -// TESTRESPONSE[s/"processor_stats": \{[^\}]*\}/"processor_stats": $body.$_path/] -// TESTRESPONSE[s/"count": \{[^\}]*\}/"count": $body.$_path/] -// TESTRESPONSE[s/"packaging_types": \[[^\]]*\]/"packaging_types": $body.$_path/] -// TESTRESPONSE[s/"snapshots": \{[^\}]*\}/"snapshots": $body.$_path/] -// TESTRESPONSE[s/"repositories": \{[^\}]*\}/"repositories": $body.$_path/] -// TESTRESPONSE[s/"clusters": \{[^\}]*\}/"clusters": $body.$_path/] -// TESTRESPONSE[s/"features": \{[^\}]*\}/"features": $body.$_path/] -// TESTRESPONSE[s/"clients": \{[^\}]*\}/"clients": $body.$_path/] -// TESTRESPONSE[s/"failure_reasons": \{[^\}]*\}/"failure_reasons": $body.$_path/] -// TESTRESPONSE[s/"field_types": \[[^\]]*\]/"field_types": $body.$_path/] -// TESTRESPONSE[s/"runtime_field_types": \[[^\]]*\]/"runtime_field_types": $body.$_path/] -// TESTRESPONSE[s/"search": \{[^\}]*\}/"search": $body.$_path/] -// TESTRESPONSE[s/"remotes_per_search_avg": [.0-9]+/"remotes_per_search_avg": $body.$_path/] -// TESTRESPONSE[s/: (true|false)/: $body.$_path/] -// TESTRESPONSE[s/: (\-)?[0-9]+/: $body.$_path/] -// TESTRESPONSE[s/: "[^"]*"/: $body.$_path/] -// These replacements do a few things: -// 1. Ignore the contents of the `plugins` object because we don't know all of -// the plugins that will be in it. And because we figure folks don't need to -// see an exhaustive list anyway. -// 2. Similarly, ignore the contents of `network_types`, `discovery_types`, -// `packaging_types`, `snapshots` and `repositories`. -// 3. Ignore the contents of the (nodes) count object, as what's shown here -// depends on the license. Voting-only nodes are e.g. only shown when this -// test runs with a basic license. -// 4. All of the numbers and strings on the right hand side of *every* field in -// the response are ignored. So we're really only asserting things about the -// the shape of this response, not the values in it. - -This API can be restricted to a subset of the nodes using <>: - -[source,console] --------------------------------------------------- -GET /_cluster/stats/nodes/node1,node*,master:false --------------------------------------------------- - -This API call will return data about the remote clusters if any are configured: - -[source,console] --------------------------------------------------- -GET /_cluster/stats?include_remotes=true --------------------------------------------------- - -The resulting response will contain the `ccs` object with information about the remote clusters: - -[source,js] --------------------------------------------------- -{ - "ccs": { - "clusters": { - "remote_cluster": { - "cluster_uuid": "YjAvIhsCQ9CbjWZb2qJw3Q", - "mode": "sniff", - "skip_unavailable": false, - "transport.compress": "true", - "version": ["8.16.0"], - "status": "green", - "nodes_count": 10, - "shards_count": 420, - "indices_count": 10, - "indices_total_size_in_bytes": 6232658362, - "max_heap_in_bytes": 1037959168, - "mem_total_in_bytes": 137438953472 - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[skip:TODO] diff --git a/docs/reference/cluster/tasks.asciidoc b/docs/reference/cluster/tasks.asciidoc deleted file mode 100644 index 79727d9a3078b..0000000000000 --- a/docs/reference/cluster/tasks.asciidoc +++ /dev/null @@ -1,345 +0,0 @@ -[[tasks]] -=== Task management API -++++ -Task management -++++ - -beta::["The task management API is new and should still be considered a beta feature. The API may change in ways that are not backwards compatible.",{es-issue}51628] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-tasks[task management APIs]. --- - -Returns information about the tasks currently executing in the cluster. - -[[tasks-api-request]] -==== {api-request-title} - -`GET /_tasks/` - -`GET /_tasks` - -[[tasks-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[tasks-api-desc]] -==== {api-description-title} - -The task management API returns information -about tasks currently executing -on one or more nodes in the cluster. - - -[[tasks-api-path-params]] -==== {api-path-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=task-id] - - -[[tasks-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=actions] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=detailed] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=group-by] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=nodes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=parent-task-id] - -`timeout`:: -(Optional, <>) -Period to wait for each node to respond. If a node does not respond before its -timeout expires, the response does not include its information. However, timed out -nodes are included in the response's `node_failures` property. Defaults to `30s`. - -`wait_for_completion`:: -(Optional, Boolean) If `true`, the request blocks until all found tasks are complete. -Defaults to `false`. - -[[tasks-api-response-codes]] -==== {api-response-codes-title} - -// tag::tasks-api-404[] -`404` (Missing resources):: -If `` is specified but not found, this code indicates that there -are no resources that match the request. -// end::tasks-api-404[] - -[[tasks-api-examples]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -GET _tasks <1> -GET _tasks?nodes=nodeId1,nodeId2 <2> -GET _tasks?nodes=nodeId1,nodeId2&actions=cluster:* <3> --------------------------------------------------- -// TEST[skip:No tasks to retrieve] - -<1> Retrieves all tasks currently running on all nodes in the cluster. -<2> Retrieves all tasks running on nodes `nodeId1` and `nodeId2`. See <> for more info about how to select individual nodes. -<3> Retrieves all cluster-related tasks running on nodes `nodeId1` and `nodeId2`. - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "nodes" : { - "oTUltX4IQMOUUVeiohTt8A" : { - "name" : "H5dfFeA", - "transport_address" : "127.0.0.1:9300", - "host" : "127.0.0.1", - "ip" : "127.0.0.1:9300", - "tasks" : { - "oTUltX4IQMOUUVeiohTt8A:124" : { - "node" : "oTUltX4IQMOUUVeiohTt8A", - "id" : 124, - "type" : "direct", - "action" : "cluster:monitor/tasks/lists[n]", - "start_time_in_millis" : 1458585884904, - "running_time_in_nanos" : 47402, - "cancellable" : false, - "parent_task_id" : "oTUltX4IQMOUUVeiohTt8A:123" - }, - "oTUltX4IQMOUUVeiohTt8A:123" : { - "node" : "oTUltX4IQMOUUVeiohTt8A", - "id" : 123, - "type" : "transport", - "action" : "cluster:monitor/tasks/lists", - "start_time_in_millis" : 1458585884904, - "running_time_in_nanos" : 236042, - "cancellable" : false - } - } - } - } -} --------------------------------------------------- - -===== Retrieve information from a particular task - -It is also possible to retrieve information for a particular task. The following -example retrieves information about task `oTUltX4IQMOUUVeiohTt8A:124`: - -[source,console] --------------------------------------------------- -GET _tasks/oTUltX4IQMOUUVeiohTt8A:124 --------------------------------------------------- -// TEST[catch:missing] - -If the task isn't found, the API returns a 404. - -To retrieve all children of a particular task: - -[source,console] --------------------------------------------------- -GET _tasks?parent_task_id=oTUltX4IQMOUUVeiohTt8A:123 --------------------------------------------------- - -If the parent isn't found, the API does not return a 404. - - -===== Get more information about tasks - -You can also use the `detailed` request parameter to get more information about -the running tasks. This is useful to distinguish tasks from each other but -is more costly to execute. For example, fetching all searches using the `detailed` -request parameter: - -[source,console] --------------------------------------------------- -GET _tasks?actions=*search&detailed --------------------------------------------------- -// TEST[skip:No tasks to retrieve] - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "nodes" : { - "oTUltX4IQMOUUVeiohTt8A" : { - "name" : "H5dfFeA", - "transport_address" : "127.0.0.1:9300", - "host" : "127.0.0.1", - "ip" : "127.0.0.1:9300", - "tasks" : { - "oTUltX4IQMOUUVeiohTt8A:464" : { - "node" : "oTUltX4IQMOUUVeiohTt8A", - "id" : 464, - "type" : "transport", - "action" : "indices:data/read/search", - "description" : "indices[test], types[test], search_type[QUERY_THEN_FETCH], source[{\"query\":...}]", - "start_time_in_millis" : 1483478610008, - "running_time_in_nanos" : 13991383, - "cancellable" : true, - "cancelled" : false - } - } - } - } -} --------------------------------------------------- - -The new `description` field contains human readable text that identifies the -particular request that the task is performing such as identifying the search -request being performed by a search task like the example above. Other kinds of -tasks have different descriptions, like <> which -has the source and the destination, or <> which just has the -number of requests and the destination indices. Many requests will only have an -empty description because more detailed information about the request is not -easily available or particularly helpful in identifying the request. - -[IMPORTANT] -============================== - -`_tasks` requests with `detailed` may also return a `status`. This is a report -of the internal status of the task. As such its format varies from task to task. -While we try to keep the `status` for a particular task consistent from version -to version this isn't always possible because we sometimes change the -implementation. In that case we might remove fields from the `status` for a -particular request so any parsing you do of the status might break in minor -releases. - -============================== - - -===== Wait for completion - -The task API can also be used to wait for completion of a particular task. The -following call will block for 10 seconds or until the task with id -`oTUltX4IQMOUUVeiohTt8A:12345` is completed. - -[source,console] --------------------------------------------------- -GET _tasks/oTUltX4IQMOUUVeiohTt8A:12345?wait_for_completion=true&timeout=10s --------------------------------------------------- -// TEST[catch:missing] - -You can also wait for all tasks for certain action types to finish. This command -will wait for all `reindex` tasks to finish: - -[source,console] --------------------------------------------------- -GET _tasks?actions=*reindex&wait_for_completion=true&timeout=10s --------------------------------------------------- - -[[task-cancellation]] -===== Task Cancellation - -If a long-running task supports cancellation, it can be cancelled with the cancel -tasks API. The following example cancels task `oTUltX4IQMOUUVeiohTt8A:12345`: - -[source,console] --------------------------------------------------- -POST _tasks/oTUltX4IQMOUUVeiohTt8A:12345/_cancel --------------------------------------------------- - -The task cancellation command supports the same task selection parameters as the -list tasks command, so multiple tasks can be cancelled at the same time. For -example, the following command will cancel all reindex tasks running on the -nodes `nodeId1` and `nodeId2`. - -[source,console] --------------------------------------------------- -POST _tasks/_cancel?nodes=nodeId1,nodeId2&actions=*reindex --------------------------------------------------- - -A task may continue to run for some time after it has been cancelled because it -may not be able to safely stop its current activity straight away, or because -{es} must complete its work on other tasks before it can process the -cancellation. The list tasks API will continue to list these cancelled tasks -until they complete. The `cancelled` flag in the response to the list tasks API -indicates that the cancellation command has been processed and the task will -stop as soon as possible. To troubleshoot why a cancelled task does not -complete promptly, use the list tasks API with the `?detailed` parameter to -identify the other tasks the system is running and also use the -<> API to obtain detailed information about the work -the system is doing instead of completing the cancelled task. - -===== Task Grouping - -The task lists returned by task API commands can be grouped either by nodes -(default) or by parent tasks using the `group_by` parameter. The following -command will change the grouping to parent tasks: - -[source,console] --------------------------------------------------- -GET _tasks?group_by=parents --------------------------------------------------- - -The grouping can be disabled by specifying `none` as a `group_by` parameter: - -[source,console] --------------------------------------------------- -GET _tasks?group_by=none --------------------------------------------------- - - -===== Identifying running tasks - -The `X-Opaque-Id` header, when provided on the HTTP request header, is going to -be returned as a header in the response as well as in the `headers` field for in -the task information. This allows to track certain calls, or associate certain -tasks with the client that started them: - -[source,sh] --------------------------------------------------- -curl -i -H "X-Opaque-Id: 123456" "http://localhost:9200/_tasks?group_by=parents" --------------------------------------------------- -//NOTCONSOLE - -The API returns the following result: - -[source,js] --------------------------------------------------- -HTTP/1.1 200 OK -X-Opaque-Id: 123456 <1> -content-type: application/json; charset=UTF-8 -content-length: 831 - -{ - "tasks" : { - "u5lcZHqcQhu-rUoFaqDphA:45" : { - "node" : "u5lcZHqcQhu-rUoFaqDphA", - "id" : 45, - "type" : "transport", - "action" : "cluster:monitor/tasks/lists", - "start_time_in_millis" : 1513823752749, - "running_time_in_nanos" : 293139, - "cancellable" : false, - "headers" : { - "X-Opaque-Id" : "123456" <2> - }, - "children" : [ - { - "node" : "u5lcZHqcQhu-rUoFaqDphA", - "id" : 46, - "type" : "direct", - "action" : "cluster:monitor/tasks/lists[n]", - "start_time_in_millis" : 1513823752750, - "running_time_in_nanos" : 92133, - "cancellable" : false, - "parent_task_id" : "u5lcZHqcQhu-rUoFaqDphA:45", - "headers" : { - "X-Opaque-Id" : "123456" <3> - } - } - ] - } - } -} --------------------------------------------------- -//NOTCONSOLE -<1> id as a part of the response header -<2> id for the tasks that was initiated by the REST request -<3> the child task of the task initiated by the REST request diff --git a/docs/reference/cluster/update-desired-nodes.asciidoc b/docs/reference/cluster/update-desired-nodes.asciidoc deleted file mode 100644 index f83f551395134..0000000000000 --- a/docs/reference/cluster/update-desired-nodes.asciidoc +++ /dev/null @@ -1,146 +0,0 @@ -[[update-desired-nodes]] -=== Create or update desired nodes API -++++ -Create or update desired nodes -++++ - -NOTE: {cloud-only} - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Creates or updates the desired nodes. - -[[update-desired-nodes-request]] -==== {api-request-title} - -[source,console] --------------------------------------------------- -PUT /_internal/desired_nodes// -{ - "nodes" : [ - { - "settings" : { - "node.name" : "instance-000187", - "node.external_id": "instance-000187", - "node.roles" : ["data_hot", "master"], - "node.attr.data" : "hot", - "node.attr.logical_availability_zone" : "zone-0" - }, - "processors" : 8.0, - "memory" : "58gb", - "storage" : "2tb" - } - ] -} --------------------------------------------------- -// TEST[s//test/] -// TEST[s//1/] - -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_internal/desired_nodes --------------------------------------------------- -// TEST[continued] - -////////////////////////// - -[[update-desired-nodes-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -`dry_run`:: - (Optional, Boolean) If `true`, then the request simulates the update and - returns a response with `dry_run` field set to `true`. - -[[update-desired-nodes-desc]] -==== {api-description-title} - -This API creates or update the desired nodes. External orchestrators can use -this API to let Elasticsearch know about the cluster topology, including future -changes such as adding or removing nodes. Using this information, the system is -able to take better decisions. - -It's possible to run the update in "dry run" mode by adding the -`?dry_run` query parameter. This will validate the request result, but will not actually perform the update. - -[[update-desired-nodes-examples]] -==== {api-examples-title} - -In this example, a new version for the desired nodes with history `Ywkh3INLQcuPT49f6kcppA` is created. -This API only accepts monotonically increasing versions. - -[source,console] --------------------------------------------------- -PUT /_internal/desired_nodes/Ywkh3INLQcuPT49f6kcppA/100 -{ - "nodes" : [ - { - "settings" : { - "node.name" : "instance-000187", - "node.external_id": "instance-000187", - "node.roles" : ["data_hot", "master"], - "node.attr.data" : "hot", - "node.attr.logical_availability_zone" : "zone-0" - }, - "processors" : 8.0, - "memory" : "58gb", - "storage" : "2tb" - } - ] -} --------------------------------------------------- -// TEST - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "replaced_existing_history_id": false, - "dry_run": false -} --------------------------------------------------- - -Additionally, it is possible to specify a processors range. -This is helpful in environments where Elasticsearch nodes can -be deployed in hosts where the number of processors that the -Elasticsearch process can use is guaranteed to be at least the -lower range and up to the upper range. This is a common scenario -in Linux deployments where cgroups is used. -[source,console] --------------------------------------------------- -PUT /_internal/desired_nodes/Ywkh3INLQcuPT49f6kcppA/101 -{ - "nodes" : [ - { - "settings" : { - "node.name" : "instance-000187", - "node.external_id": "instance-000187", - "node.roles" : ["data_hot", "master"], - "node.attr.data" : "hot", - "node.attr.logical_availability_zone" : "zone-0" - }, - "processors_range" : {"min": 8.0, "max": 10.0}, - "memory" : "58gb", - "storage" : "2tb" - } - ] -} --------------------------------------------------- - -////////////////////////// - -[source,console] --------------------------------------------------- -DELETE /_internal/desired_nodes --------------------------------------------------- -// TEST[continued] - -////////////////////////// diff --git a/docs/reference/cluster/update-settings.asciidoc b/docs/reference/cluster/update-settings.asciidoc deleted file mode 100644 index 9a718ee413e64..0000000000000 --- a/docs/reference/cluster/update-settings.asciidoc +++ /dev/null @@ -1,135 +0,0 @@ -[[cluster-update-settings]] -=== Cluster update settings API -++++ -Cluster update settings -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Configures <>. - -[[cluster-update-settings-api-request]] -==== {api-request-title} - -`PUT /_cluster/settings` - -[[cluster-update-settings-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `manage` -<> to use this API. - -[[cluster-update-settings-api-desc]] -==== {api-description-title} - -:strip-api-link: true -include::{es-ref-dir}/setup/configuration.asciidoc[tag=cluster-setting-precedence] - -[[cluster-update-settings-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=flat-settings] - -`include_defaults`:: - (Optional, Boolean) If `true`, returns all default cluster settings. - Defaults to `false`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] - - -[[cluster-update-settings-api-example]] -==== {api-examples-title} - -An example of a persistent update: - -[source,console] --------------------------------------------------- -PUT /_cluster/settings -{ - "persistent" : { - "indices.recovery.max_bytes_per_sec" : "50mb" - } -} --------------------------------------------------- - - -An example of a transient update: - -// tag::transient-settings-warning[] -[WARNING] -==== -We no longer recommend using transient cluster settings. Use persistent cluster -settings instead. If a cluster becomes unstable, transient settings can clear -unexpectedly, resulting in a potentially undesired cluster configuration. -// See the <>. -==== -// end::transient-settings-warning[] - -[source,console] --------------------------------------------------- -PUT /_cluster/settings?flat_settings=true -{ - "transient" : { - "indices.recovery.max_bytes_per_sec" : "20mb" - } -} --------------------------------------------------- - -The response to an update returns the changed setting, as in this response to -the transient example: - -[source,console-result] --------------------------------------------------- -{ - ... - "persistent" : { }, - "transient" : { - "indices.recovery.max_bytes_per_sec" : "20mb" - } -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"acknowledged": true,/] - - -This example resets a setting: - -[source,console] --------------------------------------------------- -PUT /_cluster/settings -{ - "transient" : { - "indices.recovery.max_bytes_per_sec" : null - } -} --------------------------------------------------- - - -The response does not include settings that have been reset: - -[source,console-result] --------------------------------------------------- -{ - ... - "persistent" : {}, - "transient" : {} -} --------------------------------------------------- -// TESTRESPONSE[s/\.\.\./"acknowledged": true,/] - - -You can also reset settings using wildcards. For example, to reset -all dynamic `indices.recovery` settings: - -[source,console] --------------------------------------------------- -PUT /_cluster/settings -{ - "transient" : { - "indices.recovery.*" : null - } -} --------------------------------------------------- diff --git a/docs/reference/cluster/voting-exclusions.asciidoc b/docs/reference/cluster/voting-exclusions.asciidoc deleted file mode 100644 index e60b3be26508d..0000000000000 --- a/docs/reference/cluster/voting-exclusions.asciidoc +++ /dev/null @@ -1,124 +0,0 @@ -[[voting-config-exclusions]] -=== Voting configuration exclusions API -++++ -Voting configuration exclusions -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-cluster[Cluster APIs]. --- - -Adds or removes master-eligible nodes from the -<>. - -[[voting-config-exclusions-api-request]] -==== {api-request-title} - -`POST /_cluster/voting_config_exclusions?node_names=` + - -`POST /_cluster/voting_config_exclusions?node_ids=` + - -`DELETE /_cluster/voting_config_exclusions` - -[[voting-config-exclusions-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `manage` -<> to use this API. - -* If the <> is enabled, only operator -users can use this API. - -[[voting-config-exclusions-api-desc]] -==== {api-description-title} - -By default, if there are more than three master-eligible nodes in the cluster -and you remove fewer than half of the master-eligible nodes in the cluster at -once, the <> automatically -shrinks. - -If you want to shrink the voting configuration to contain fewer than three -nodes or to remove half or more of the master-eligible nodes in the cluster at -once, use this API to remove departing nodes from the voting configuration -manually. The API adds an entry for each specified node to the cluster's voting -configuration exclusions list. It then waits until the cluster has reconfigured -its voting configuration to exclude the specified nodes. - -Clusters should have no voting configuration exclusions in normal operation. -Once the excluded nodes have stopped, clear the voting configuration exclusions -with `DELETE /_cluster/voting_config_exclusions`. This API waits for the nodes -to be fully removed from the cluster before it returns. If your cluster has -voting configuration exclusions for nodes that you no longer intend to remove, -use `DELETE /_cluster/voting_config_exclusions?wait_for_removal=false` to clear -the voting configuration exclusions without waiting for the nodes to leave the -cluster. - -A response to `POST /_cluster/voting_config_exclusions` with an HTTP status -code of `200 OK` guarantees that the node has been removed from the voting -configuration and will not be reinstated until the voting configuration -exclusions are cleared by calling `DELETE /_cluster/voting_config_exclusions`. -If the call to `POST /_cluster/voting_config_exclusions` fails or returns a -response with an HTTP status code other than `200 OK` then the node may not -have been removed from the voting configuration. In that case, you may safely -retry the call. - -NOTE: Voting exclusions are required only when you remove at least half of the -master-eligible nodes from a cluster in a short time period. They are not -required when removing master-ineligible nodes or when removing fewer than half -of the master-eligible nodes. - -For more information, see <>. - -[[voting-config-exclusions-api-query-params]] -==== {api-query-parms-title} - -`node_names`:: -A comma-separated list of the names of the nodes to exclude from the voting -configuration. If specified, you may not also specify `?node_ids`. Only applies -to the `POST` form of this API. - -`node_ids`:: -A comma-separated list of the persistent ids of the nodes to exclude from the -voting configuration. If specified, you may not also specify `?node_names`. -Only applies to the `POST` form of this API. - -`timeout`:: -(Optional, <>) When adding a voting configuration -exclusion, the API waits for the specified nodes to be excluded from the voting -configuration before returning. The period of time to wait is specified by the -`?timeout` query parameter. If the timeout expires before the appropriate -condition is satisfied, the request fails and returns an error. Defaults to -`30s`. Only applies to the `POST` form of this API. - -`master_timeout`:: -(Optional, <>) Defines how long to wait while trying to -route the request to the current master node in the cluster. Defaults to `30s`. -Applies to both `POST` and `DELETE` forms of this API. - -`wait_for_removal`:: -(Optional, Boolean) Specifies whether to wait for all excluded nodes to be -removed from the cluster before clearing the voting configuration exclusions -list. Defaults to `true`, meaning that all excluded nodes must be removed from -the cluster before this API takes any action. If set to `false` then the voting -configuration exclusions list is cleared even if some excluded nodes are still -in the cluster. Only applies to the `DELETE` form of this API. - -[[voting-config-exclusions-api-example]] -==== {api-examples-title} - -Adds nodes named `nodeName1` and `nodeName2` to the voting configuration -exclusions list: - -[source,console] --------------------------------------------------- -POST /_cluster/voting_config_exclusions?node_names=nodeName1,nodeName2 --------------------------------------------------- - -Remove all exclusions from the list: - -[source,console] --------------------------------------------------- -DELETE /_cluster/voting_config_exclusions --------------------------------------------------- diff --git a/docs/reference/commands/certgen.asciidoc b/docs/reference/commands/certgen.asciidoc deleted file mode 100644 index 98e47f5a624e7..0000000000000 --- a/docs/reference/commands/certgen.asciidoc +++ /dev/null @@ -1,158 +0,0 @@ -[[certgen]] -== elasticsearch-certgen - -deprecated[6.1,"Replaced by <>."] - -The `elasticsearch-certgen` command simplifies the creation of certificate -authorities (CA), certificate signing requests (CSR), and signed certificates -for use with the Elastic Stack. Though this command is deprecated, you do not -need to replace CAs, CSRs, or certificates that it created. - -[discrete] -=== Synopsis - -[source,shell] --------------------------------------------------- -bin/elasticsearch-certgen -(([--cert ] [--days ] [--dn ] [--key ] -[--keysize ] [--pass ] [--p12 ]) -| [--csr]) -[-E ] [-h, --help] [--in ] [--out ] -([-s, --silent] | [-v, --verbose]) --------------------------------------------------- - -[discrete] -=== Description - -By default, the command runs in interactive mode and you are prompted for -information about each instance. An instance is any piece of the Elastic Stack -that requires a Transport Layer Security (TLS) or SSL certificate. Depending on -your configuration, {es}, Logstash, {kib}, and Beats might all require a -certificate and private key. - -The minimum required value for each instance is a name. This can simply be the -hostname, which is used as the Common Name of the certificate. You can also use -a full distinguished name. IP addresses and DNS names are optional. Multiple -values can be specified as a comma separated string. If no IP addresses or DNS -names are provided, you might disable hostname verification in your TLS or SSL -configuration. - -Depending on the parameters that you specify, you are also prompted for -necessary information such as the path for the output file and the CA private -key password. - -The `elasticsearch-certgen` command also supports a silent mode of operation to -enable easier batch operations. For more information, see <>. - -The output file is a zip file that contains the signed certificates and private -keys for each instance. If you chose to generate a CA, which is the default -behavior, the certificate and private key are included in the output file. If -you chose to generate CSRs, you should provide them to your commercial or -organization-specific certificate authority to obtain signed certificates. The -signed certificates must be in PEM format to work with the {stack} -{security-features}. - -[discrete] -[[certgen-parameters]] -=== Parameters - -`--cert `:: Specifies to generate new instance certificates and keys -using an existing CA certificate, which is provided in the `` argument. -This parameter cannot be used with the `-csr` parameter. - -`--csr`:: Specifies to operate in certificate signing request mode. - -`--days `:: -Specifies an integer value that represents the number of days the generated keys -are valid. The default value is `1095`. This parameter cannot be used with the -`-csr` parameter. - -`--dn `:: -Defines the _Distinguished Name_ that is used for the generated CA certificate. -The default value is `CN=Elastic Certificate Tool Autogenerated CA`. -This parameter cannot be used with the `-csr` parameter. - -`-E `:: Configures a setting. - -`-h, --help`:: Returns all of the command parameters. - -`--in `:: Specifies the file that is used to run in silent mode. The -input file must be a YAML file, as described in <>. - -`--key `:: Specifies the _private-key_ file for the CA certificate. -This parameter is required whenever the `-cert` parameter is used. - -`--keysize `:: -Defines the number of bits that are used in generated RSA keys. The default -value is `2048`. - -`--out `:: Specifies a path for the output file. - -`--pass `:: Specifies the password for the CA private key. -If the `-key` parameter is provided, then this is the password for the existing -private key file. Otherwise, it is the password that should be applied to the -generated CA key. This parameter cannot be used with the `-csr` parameter. - -`--p12 `:: -Generate a PKCS#12 (`.p12` or `.pfx`) container file for each of the instance -certificates and keys. The generated file is protected by the supplied password, -which can be blank. This parameter cannot be used with the `-csr` parameter. - -`-s, --silent`:: Shows minimal output. - -`-v, --verbose`:: Shows verbose output. - -[discrete] -=== Examples - -[discrete] -[[certgen-silent]] -==== Using `elasticsearch-certgen` in Silent Mode - -To use the silent mode of operation, you must create a YAML file that contains -information about the instances. It must match the following format: - -[source, yaml] --------------------------------------------------- -instances: - - name: "node1" <1> - ip: <2> - - "192.0.2.1" - dns: <3> - - "node1.mydomain.com" - - name: "node2" - ip: - - "192.0.2.2" - - "198.51.100.1" - - name: "node3" - - name: "node4" - dns: - - "node4.mydomain.com" - - "node4.internal" - - name: "CN=node5,OU=IT,DC=mydomain,DC=com" - filename: "node5" <4> --------------------------------------------------- -<1> The name of the instance. This can be a simple string value or can be a -Distinguished Name (DN). This is the only required field. -<2> An optional array of strings that represent IP Addresses for this instance. -Both IPv4 and IPv6 values are allowed. The values are added as Subject -Alternative Names. -<3> An optional array of strings that represent DNS names for this instance. -The values are added as Subject Alternative Names. -<4> The filename to use for this instance. This name is used as the name of the -directory that contains the instance's files in the output. It is also used in -the names of the files within the directory. This filename should not have an -extension. Note: If the `name` provided for the instance does not represent a -valid filename, then the `filename` field must be present. - -When your YAML file is ready, you can use the `elasticsearch-certgen` command to -generate certificates or certificate signing requests. Simply use the `-in` -parameter to specify the location of the file. For example: - -[source, sh] --------------------------------------------------- -bin/elasticsearch-certgen -in instances.yml --------------------------------------------------- - -This command generates a CA certificate and private key as well as certificates -and private keys for the instances that are listed in the YAML file. diff --git a/docs/reference/commands/certutil.asciidoc b/docs/reference/commands/certutil.asciidoc deleted file mode 100644 index 6720aef470049..0000000000000 --- a/docs/reference/commands/certutil.asciidoc +++ /dev/null @@ -1,322 +0,0 @@ -[[certutil]] -== elasticsearch-certutil - -The `elasticsearch-certutil` command simplifies the creation of certificates for -use with Transport Layer Security (TLS) in the {stack}. - -[discrete] -=== Synopsis - -[source,shell] --------------------------------------------------- -bin/elasticsearch-certutil -( -(ca [--ca-dn ] [--days ] [--pem]) - -| (cert ([--ca ] | [--ca-cert --ca-key ]) -[--ca-dn ] [--ca-pass ] [--days ] -[--dns ] [--in ] [--ip ] -[--multiple] [--name ] [--pem] [--self-signed]) - -| (csr [--dns ] [--in ] [--ip ] -[--name ]) - -[-E ] [--keysize ] [--out ] -[--pass ] -) - -| http - -[-h, --help] ([-s, --silent] | [-v, --verbose]) --------------------------------------------------- - -[discrete] -=== Description - -You can specify one of the following modes: `ca`, `cert`, `csr`, `http`. The -`elasticsearch-certutil` command also supports a silent mode of operation to -enable easier batch operations. - -[discrete] -[[certutil-ca]] -==== CA mode - -The `ca` mode generates a new certificate authority (CA). By default, it -produces a single PKCS#12 output file, which holds the CA certificate and the -private key for the CA. If you specify the `--pem` parameter, the command -generates a zip file, which contains the certificate and private key in PEM -format. - -You can subsequently use these files as input for the `cert` mode of the command. - -[discrete] -[[certutil-cert]] -==== CERT mode - -The `cert` mode generates X.509 certificates and private keys. By default, it -produces a single certificate and key for use on a single instance. - -To generate certificates and keys for multiple instances, specify the -`--multiple` parameter, which prompts you for details about each instance. -Alternatively, you can use the `--in` parameter to specify a YAML file that -contains details about the instances. - -An instance is any piece of the Elastic Stack that requires a TLS or SSL -certificate. Depending on your configuration, {es}, Logstash, {kib}, and Beats -might all require a certificate and private key. The minimum required -information for an instance is its name, which is used as the common name for -the certificate. The instance name can be a hostname value or a full -distinguished name. If the instance name would result in an invalid file or -directory name, you must also specify a file name in the `--name` command -parameter or in the `filename` field in an input YAML file. - -You can optionally provide IP addresses or DNS names for each instance. If -neither IP addresses nor DNS names are specified, the Elastic Stack products -cannot perform hostname verification and you might need to configure the -`verification_mode` security setting to `certificate` only. For more information -about this setting, see <>. - -All certificates that are generated by this command are signed by a CA unless -the `--self-signed` parameter is specified. You must provide your own CA with the -`--ca` or `--ca-cert` and `--ca-key` parameters unless `--self-signed` is specified. -For more information about generating a CA, see the -<>. -To generate self-signed certificates, use the `--self-signed` parameter. - -By default, the `cert` mode produces a single PKCS#12 output file which holds -the instance certificate, the instance private key, and the CA certificate. If -you specify the `--pem` parameter, the command generates PEM formatted -certificates and keys and packages them into a zip file. -If you specify the `--multiple` or `--in` parameters, -the command produces a zip file containing the generated certificates and keys. - -[discrete] -[[certutil-csr]] -==== CSR mode - -The `csr` mode generates certificate signing requests (CSRs) that you can send -to a trusted certificate authority to obtain signed certificates. The signed -certificates must be in PEM or PKCS#12 format to work with {es} -{security-features}. - -By default, the command produces a single CSR for a single instance. - -To generate CSRs for multiple instances, specify the `--multiple` parameter, -which prompts you for details about each instance. Alternatively, you can use -the `--in` parameter to specify a YAML file that contains details about the -instances. - -The `csr` mode produces a single zip file which contains the CSRs and the -private keys for each instance. Each CSR is provided as a standard PEM -encoding of a PKCS#10 CSR. Each key is provided as a PEM encoding of an RSA -private key. - -[discrete] -[[certutil-http]] -==== HTTP mode - -The `http` mode guides you through the process of generating certificates for -use on the HTTP (REST) interface for {es}. It asks you a number of questions in -order to generate the right set of files for your needs. For example, depending -on your choices, it might generate a zip file that contains a certificate -authority (CA), a certificate signing request (CSR), or certificates and keys -for use in {es} and {kib}. Each folder in the zip file contains a readme that -explains how to use the files. - -[discrete] -[[certutil-parameters]] -=== Parameters - -`ca`:: Specifies to generate a new local certificate authority (CA). This -parameter cannot be used with the `csr`, `cert` or `http` parameters. - -`cert`:: Specifies to generate new X.509 certificates and keys. -This parameter cannot be used with the `csr`, `ca` or `http` parameters. - -`csr`:: Specifies to generate certificate signing requests. This parameter -cannot be used with the `ca`, `cert` or `http` parameters. - -`http`:: Generates a new certificate or certificate request for the {es} HTTP -interface. This parameter cannot be used with the `ca`, `cert` or `csr` parameters. - -`--ca `:: Specifies the path to an existing CA key pair -(in PKCS#12 format). This parameter is only applicable to the `cert` parameter. - -`--ca-cert `:: Specifies the path to an existing CA certificate (in -PEM format). You must also specify the `--ca-key` parameter. The `--ca-cert` -parameter is only applicable to the `cert` parameter. - -`--ca-dn `:: Defines the _Distinguished Name_ (DN) that is used for the -generated CA certificate. The default value is -`CN=Elastic Certificate Tool Autogenerated CA`. This parameter cannot be used -with the `csr` or `http` parameters. - -`--ca-key `:: Specifies the path to an existing CA private key (in -PEM format). You must also specify the `--ca-cert` parameter. The `--ca-key` -parameter is only applicable to the `cert` parameter. - -`--ca-pass `:: Specifies the password for an existing CA private key -or the generated CA private key. This parameter is only applicable to the `cert` parameter - -`--days `:: Specifies an integer value that represents the number of days the -generated certificates are valid. The default value is `1095`. This parameter -cannot be used with the `csr` or `http` parameters. - -`--dns `:: Specifies a comma-separated list of DNS names. This -parameter cannot be used with the `ca` or `http` parameters. - -`-E `:: Configures a setting. - -`-h, --help`:: Returns all of the command parameters. - -`--in `:: Specifies the file that is used to run in silent mode. The -input file must be a YAML file. This parameter cannot be used with the `ca` or -`http` parameters. - -`--ip `:: Specifies a comma-separated list of IP addresses. This -parameter cannot be used with the `ca` or `http` parameters. - -`--keysize `:: -Defines the number of bits that are used in generated RSA keys. The default -value is `2048`. This parameter cannot be used with the `http` parameter. - -`--multiple`:: -Specifies to generate files for multiple instances. This parameter cannot be -used with the `ca` or `http` parameters. - -`--name `:: -Specifies the name of the generated certificate. This parameter cannot be used -with the `ca` or `http` parameters. - -`--out `:: Specifies a path for the output files. This parameter -cannot be used with the `http` parameter. - -`--pass `:: Specifies the password for the generated private keys. -This parameter cannot be used with the `http` parameters. -+ -Keys stored in PKCS#12 format are always password protected, however, -this password may be _blank_. If you want to specify a blank password -without a prompt, use `--pass ""` (with no `=`) on the command line. -+ -Keys stored in PEM format are password protected only if the -`--pass` parameter is specified. If you do not supply an argument for the -`--pass` parameter, you are prompted for a password. -Encrypted PEM files do not support blank passwords (if you do not -wish to password-protect your PEM keys, then do not specify -`--pass`). - - -`--pem`:: Generates certificates and keys in PEM format instead of PKCS#12. This -parameter cannot be used with the `csr` or `http` parameters. - -`--self-signed`:: Generates self-signed certificates. This parameter is only -applicable to the `cert` parameter. -+ --- -NOTE: This option is not recommended for <>. -In fact, a self-signed certificate should be used only when you can be sure -that a CA is definitely not needed and trust is directly given to the -certificate itself. - --- - -`-s, --silent`:: Shows minimal output. - -`-v, --verbose`:: Shows verbose output. - -[discrete] -=== Examples - -The following command generates a CA certificate and private key in PKCS#12 -format: - -[source, sh] --------------------------------------------------- -bin/elasticsearch-certutil ca --------------------------------------------------- - -You are prompted for an output filename and a password. Alternatively, you can -specify the `--out` and `--pass` parameters. - -You can then generate X.509 certificates and private keys by using the new -CA. For example: - -[source, sh] --------------------------------------------------- -bin/elasticsearch-certutil cert --ca elastic-stack-ca.p12 --------------------------------------------------- - -You are prompted for the CA password and for an output filename and password. -Alternatively, you can specify the `--ca-pass`, `--out`, and `--pass` parameters. - -By default, this command generates a file called `elastic-certificates.p12`, -which you can copy to the relevant configuration directory for each Elastic -product that you want to configure. For more information, see -<>. - -[discrete] -[[certutil-silent]] -==== Using `elasticsearch-certutil` in Silent Mode - -To use the silent mode of operation, you must create a YAML file that contains -information about the instances. It must match the following format: - -[source, yaml] --------------------------------------------------- -instances: - - name: "node1" <1> - ip: <2> - - "192.0.2.1" - dns: <3> - - "node1.mydomain.com" - - name: "node2" - ip: - - "192.0.2.2" - - "198.51.100.1" - - name: "node3" - - name: "node4" - dns: - - "node4.mydomain.com" - - "node4.internal" - - name: "CN=node5,OU=IT,DC=mydomain,DC=com" - filename: "node5" <4> --------------------------------------------------- -<1> The name of the instance. This can be a simple string value or can be a -Distinguished Name (DN). This is the only required field. -<2> An optional array of strings that represent IP Addresses for this instance. -Both IPv4 and IPv6 values are allowed. The values are added as Subject -Alternative Names. -<3> An optional array of strings that represent DNS names for this instance. -The values are added as Subject Alternative Names. -<4> The filename to use for this instance. This name is used as the name of the -directory that contains the instance's files in the output. It is also used in -the names of the files within the directory. This filename should not have an -extension. Note: If the `name` provided for the instance does not represent a -valid filename, then the `filename` field must be present. - -When your YAML file is ready, you can use the `elasticsearch-certutil` command -to generate certificates or certificate signing requests. Simply use the `--in` -parameter to specify the location of the file. For example: - -[source, sh] --------------------------------------------------- -bin/elasticsearch-certutil cert --silent --in instances.yml --out test1.zip --pass testpassword --ca elastic-stack-ca.p12 --------------------------------------------------- - -This command generates a compressed `test1.zip` file. After you decompress the -output file, there is a directory for each instance that was listed in the -`instances.yml` file. Each instance directory contains a single PKCS#12 (`.p12`) -file, which contains the instance certificate, instance private key, and CA -certificate. - -You can also use the YAML file to generate certificate signing requests. For -example: - -[source, sh] --------------------------------------------------- -bin/elasticsearch-certutil csr --silent --in instances.yml --out test2.zip --pass testpassword --------------------------------------------------- - -This command generates a compressed file, which contains a directory for each -instance. Each instance directory contains a certificate signing request -(`*.csr` file) and private key (`*.key` file). diff --git a/docs/reference/commands/cli-jvm-options.asciidoc b/docs/reference/commands/cli-jvm-options.asciidoc deleted file mode 100644 index 0428ead60b626..0000000000000 --- a/docs/reference/commands/cli-jvm-options.asciidoc +++ /dev/null @@ -1,14 +0,0 @@ -[[cli-tool-jvm-options-{tool-name}]] -[float] -==== JVM options - -CLI tools run with 64MB of heap. For most tools, this value is fine. However, if -needed this can be overridden by setting the `CLI_JAVA_OPTS` environment variable. -For example, the following increases the heap size used by the -`pass:a[elasticsearch-{tool-name}]` tool to 1GB. - -[source,shell,subs=attributes+] --------------------------------------------------- -export CLI_JAVA_OPTS="-Xmx1g" -bin/elasticsearch-{tool-name} ... --------------------------------------------------- diff --git a/docs/reference/commands/create-enrollment-token.asciidoc b/docs/reference/commands/create-enrollment-token.asciidoc deleted file mode 100644 index 646135ff856d7..0000000000000 --- a/docs/reference/commands/create-enrollment-token.asciidoc +++ /dev/null @@ -1,77 +0,0 @@ -[roles="xpack"] -[[create-enrollment-token]] - -== elasticsearch-create-enrollment-token - -The `elasticsearch-create-enrollment-token` command creates enrollment tokens for -{es} nodes and {kib} instances. - -[discrete] -=== Synopsis - -[source,shell] ----- -bin/elasticsearch-create-enrollment-token -[-f, --force] [-h, --help] [-E ] [-s, --scope] [--url] ----- - -[discrete] -=== Description - -NOTE: `elasticsearch-create-enrollment-token` can only be used with {es} clusters -that have been <>. - -Use this command to create enrollment tokens, which you can use to enroll new -{es} nodes to an existing cluster or configure {kib} instances to communicate -with an existing {es} cluster that has security features enabled. -The command generates (and subsequently removes) a temporary user in the -<> to run the request that creates enrollment tokens. - -IMPORTANT: You cannot use this tool if the file realm is disabled in your -`elasticsearch.yml` file. - -This command uses an HTTP connection to connect to the cluster and run the user -management requests. The command automatically attempts to establish the connection -over HTTPS by using the `xpack.security.http.ssl` settings in -the `elasticsearch.yml` file. If you do not use the default configuration directory, -ensure that the `ES_PATH_CONF` environment variable returns the -correct path before you run the `elasticsearch-create-enrollment-token` command. You can -override settings in your `elasticsearch.yml` file by using the `-E` command -option. For more information about debugging connection failures, see -<>. - -[discrete] -[[create-enrollment-token-parameters]] -=== Parameters - -`-E `:: Configures a standard {es} or {xpack} setting. - -`-f, --force`:: Forces the command to run against an unhealthy cluster. - -`-h, --help`:: Returns all of the command parameters. - -`-s, --scope`:: Specifies the scope of the generated token. Supported values are `node` and `kibana`. - -`--url`:: Specifies the base URL (hostname and port of the local node) that the tool uses to submit API -requests to {es}. The default value is determined from the settings in your -`elasticsearch.yml` file. If `xpack.security.http.ssl.enabled` is set to `true`, -you must specify an HTTPS URL. - -[discrete] -=== Examples - -The following command creates an enrollment token for enrolling an {es} node into a cluster: - -[source,shell] ----- -bin/elasticsearch-create-enrollment-token -s node ----- - -The following command creates an enrollment token for enrolling a {kib} instance into a cluster. -The specified URL indicates where the elasticsearch-create-enrollment-token tool attempts to reach the -local {es} node: - -[source,shell] ----- -bin/elasticsearch-create-enrollment-token -s kibana --url "https://172.0.0.3:9200" ----- diff --git a/docs/reference/commands/croneval.asciidoc b/docs/reference/commands/croneval.asciidoc deleted file mode 100644 index 80fd143c22a0a..0000000000000 --- a/docs/reference/commands/croneval.asciidoc +++ /dev/null @@ -1,55 +0,0 @@ -[[elasticsearch-croneval]] -== elasticsearch-croneval - -Validates and evaluates a <>. - -[discrete] -=== Synopsis - -[source,shell] --------------------------------------------------- -bin/elasticsearch-croneval -[-c, --count ] [-h, --help] -([-s, --silent] | [-v, --verbose]) --------------------------------------------------- - -[discrete] -=== Description - -This command enables you to verify that your -cron expressions are valid for use with -{es} and produce the expected results. - -This command is provided in the `$ES_HOME/bin` directory. - -[discrete] -[[elasticsearch-croneval-parameters]] -=== Parameters - -`-c, --count` :: - The number of future times this expression will be triggered. The default - value is `10`. - -`-d, --detail`:: - Shows detail for invalid cron expression. It will print the stacktrace if the - expression is not valid. - -`-h, --help`:: - Returns all of the command parameters. - -`-s, --silent`:: - Shows minimal output. - -`-v, --verbose`:: - Shows verbose output. - -[discrete] -=== Example - -If the cron expression is valid, the following command displays the next -20 times that the schedule will be triggered: - -[source,bash] --------------------------------------------------- -bin/elasticsearch-croneval "0 0/1 * * * ?" -c 20 --------------------------------------------------- diff --git a/docs/reference/commands/index.asciidoc b/docs/reference/commands/index.asciidoc deleted file mode 100644 index e55cd89bd6af4..0000000000000 --- a/docs/reference/commands/index.asciidoc +++ /dev/null @@ -1,39 +0,0 @@ -[[commands]] -= Command line tools - -[partintro] --- - -{es} provides the following tools for configuring security and performing other -tasks from the command line: - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - --- - -include::certgen.asciidoc[] -include::certutil.asciidoc[] -include::create-enrollment-token.asciidoc[] -include::croneval.asciidoc[] -include::keystore.asciidoc[] -include::node-tool.asciidoc[] -include::reconfigure-node.asciidoc[] -include::reset-password.asciidoc[] -include::saml-metadata.asciidoc[] -include::service-tokens-command.asciidoc[] -include::setup-passwords.asciidoc[] -include::shard-tool.asciidoc[] -include::syskeygen.asciidoc[] -include::users-command.asciidoc[] diff --git a/docs/reference/commands/keystore.asciidoc b/docs/reference/commands/keystore.asciidoc deleted file mode 100644 index 06e0ea0cf5078..0000000000000 --- a/docs/reference/commands/keystore.asciidoc +++ /dev/null @@ -1,277 +0,0 @@ -[[elasticsearch-keystore]] -== elasticsearch-keystore - -The `elasticsearch-keystore` command manages <> -in the {es} keystore. - -[discrete] -[[elasticsearch-keystore-synopsis]] -=== Synopsis - -[source,shell] --------------------------------------------------- -bin/elasticsearch-keystore -( [add ] [-f] [--stdin] -| [add-file ( )+] -| [create] [-p] -| [has-passwd] -| [list] -| [passwd] -| [remove ] -| [show [-o ] ] -| [upgrade] -) [-h, --help] ([-s, --silent] | [-v, --verbose]) --------------------------------------------------- - -[discrete] -[[elasticsearch-keystore-description]] -=== Description - -IMPORTANT: This command should be run as the user that will run {es}. - -Currently, all secure settings are node-specific settings that must have the -same value on every node. Therefore you must run this command on every node. - -When the keystore is password-protected, you must supply the password each time -{es} starts. - -Modifications to the keystore are not automatically applied to the running {es} -node. -Any changes to the keystore will take effect when you restart {es}. -Some secure settings can be explicitly <> -without restart. - -Only some settings are designed to be read from the keystore. However, there -is no validation to block unsupported settings from the keystore and they can -cause {es} to fail to start. To see whether a setting is supported in the -keystore, see the setting reference. - -[discrete] -[[elasticsearch-keystore-parameters]] -=== Parameters - -`add `:: Adds settings to the keystore. Multiple setting names can be -specified as arguments to the `add` command. By default, you are prompted for -the values of the settings. If the keystore is password protected, you are also -prompted to enter the password. If a setting already exists in the keystore, you -must confirm that you want to overwrite the current value. If the keystore does -not exist, you must confirm that you want to create a keystore. To avoid these -two confirmation prompts, use the `-f` parameter. - -`add-file ( )+`:: Adds files to the keystore. - -`create`:: Creates the keystore. - -`-f, --force`:: When used with the `add` parameter, the command no longer prompts you -before overwriting existing entries in the keystore. Also, if you haven't -created a keystore yet, it creates a keystore that is obfuscated but not -password protected. - -`-h, --help`:: Returns all of the command parameters. - -`has-passwd`:: Returns a success message if the keystore exists and is -password-protected. Otherwise, the command fails with exit code 1 and returns an -error message. - -`list`:: Lists the settings in the keystore. If the keystore is password -protected, you are prompted to enter the password. - -`-p`:: When used with the `create` parameter, the command prompts you to enter a -keystore password. If you don't specify the `-p` flag or if you enter an empty -password, the keystore is obfuscated but not password protected. - -`passwd`:: Changes or sets the keystore password. If the keystore is password -protected, you are prompted to enter the current password and the new one. You -can optionally use an empty string to remove the password. If the keystore is -not password protected, you can use this command to set a password. - -`remove `:: Removes settings from the keystore. Multiple setting -names can be specified as arguments to the `remove` command. - -`show `:: Displays the value of a single setting in the keystore. -Pass the `-o` (or `--output`) parameter to write the setting to a file. -If writing to the standard output (the terminal) the setting's value is always -interpreted as a UTF-8 string. If the setting contains binary data (for example -for data that was added via the `add-file` command), always use the `-o` option -to write to a file. - -`-s, --silent`:: Shows minimal output. - -`-x, --stdin`:: When used with the `add` parameter, you can pass the settings values -through standard input (stdin). Separate multiple values with carriage returns -or newlines. See <>. - -`upgrade`:: Upgrades the internal format of the keystore. - -`-v, --verbose`:: Shows verbose output. - -[discrete] -[[elasticsearch-keystore-examples]] -=== Examples - -[discrete] -[[creating-keystore]] -==== Create the keystore - -To create the `elasticsearch.keystore`, use the `create` command: - -[source,sh] ----------------------------------------------------------------- -bin/elasticsearch-keystore create -p ----------------------------------------------------------------- - -You are prompted to enter the keystore password. A password-protected -`elasticsearch.keystore` file is created alongside the `elasticsearch.yml` file. - -[discrete] -[[changing-keystore-password]] -==== Change the password of the keystore - -To change the password of the `elasticsearch.keystore`, use the `passwd` command: - -[source,sh] ----------------------------------------------------------------- -bin/elasticsearch-keystore passwd ----------------------------------------------------------------- - -If the {es} keystore is password protected, you are prompted to enter the -current password and then enter the new one. If it is not password protected, -you are prompted to set a password. - -[discrete] -[[list-settings]] -==== List settings in the keystore - -To list the settings in the keystore, use the `list` command. - -[source,sh] ----------------------------------------------------------------- -bin/elasticsearch-keystore list ----------------------------------------------------------------- - -If the {es} keystore is password protected, you are prompted to enter the -password. - -[discrete] -[[add-string-to-keystore]] -==== Add settings to the keystore - -Sensitive string settings, like authentication credentials for Cloud plugins, -can be added with the `add` command: - -[source,sh] ----------------------------------------------------------------- -bin/elasticsearch-keystore add the.setting.name.to.set ----------------------------------------------------------------- - -You are prompted to enter the value of the setting. If the {es} keystore is -password protected, you are also prompted to enter the password. - -You can also add multiple settings with the `add` command: - -[source,sh] ----------------------------------------------------------------- -bin/elasticsearch-keystore add \ - the.setting.name.to.set \ - the.other.setting.name.to.set ----------------------------------------------------------------- - -You are prompted to enter the values of the settings. If the {es} keystore is -password protected, you are also prompted to enter the password. - -To pass the settings values through standard input (stdin), use the `--stdin` -flag: - -[source,sh] ----------------------------------------------------------------- -cat /file/containing/setting/value | bin/elasticsearch-keystore add --stdin the.setting.name.to.set ----------------------------------------------------------------- - -Values for multiple settings must be separated by carriage returns or newlines. - -[discrete] -[[add-file-to-keystore]] -==== Add files to the keystore - -You can add sensitive files, like authentication key files for Cloud plugins, -using the `add-file` command. Settings and file paths are specified in pairs -consisting of `setting path`. The value of the setting will be the binary contents -of the file path at the time the file is added to the keystore. - -[source,sh] ----------------------------------------------------------------- -bin/elasticsearch-keystore add-file the.setting.name.to.set /path/example-file.json ----------------------------------------------------------------- - -You can add multiple files with the `add-file` command: - -[source,sh] ----------------------------------------------------------------- -bin/elasticsearch-keystore add-file \ - the.setting.name.to.set /path/example-file.json \ - the.other.setting.name.to.set /path/other-example-file.json ----------------------------------------------------------------- - -If the {es} keystore is password protected, you are prompted to enter the -password. - -[discrete] -[[show-keystore-value]] -==== Show settings in the keystore - -To display the value of a setting in the keystore use the `show` command: - -[source,sh] ----------------------------------------------------------------- -bin/elasticsearch-keystore show the.name.of.the.setting.to.show ----------------------------------------------------------------- - -If the setting contains binary data you should write it to a file with the -`-o` (or `--output`) option: - -[source,sh] ----------------------------------------------------------------- -bin/elasticsearch-keystore show -o my_file binary.setting.name ----------------------------------------------------------------- - -If the {es} keystore is password protected, you are prompted to enter the -password. - -[discrete] -[[remove-settings]] -==== Remove settings from the keystore - -To remove a setting from the keystore, use the `remove` command: - -[source,sh] ----------------------------------------------------------------- -bin/elasticsearch-keystore remove the.setting.name.to.remove ----------------------------------------------------------------- - -You can also remove multiple settings with the `remove` command: - -[source,sh] ----------------------------------------------------------------- -bin/elasticsearch-keystore remove \ - the.setting.name.to.remove \ - the.other.setting.name.to.remove ----------------------------------------------------------------- - -If the {es} keystore is password protected, you are prompted to enter the -password. - -[discrete] -[[keystore-upgrade]] -==== Upgrade the keystore - -Occasionally, the internal format of the keystore changes. When {es} is -installed from a package manager, an upgrade of the on-disk keystore to the new -format is done during package upgrade. In other cases, {es} performs the upgrade -during node startup. This requires that {es} has write permissions to the -directory that contains the keystore. Alternatively, you can manually perform -such an upgrade by using the `upgrade` command: - -[source,sh] ----------------------------------------------------------------- -bin/elasticsearch-keystore upgrade ----------------------------------------------------------------- diff --git a/docs/reference/commands/node-tool.asciidoc b/docs/reference/commands/node-tool.asciidoc deleted file mode 100644 index 265006aa3df17..0000000000000 --- a/docs/reference/commands/node-tool.asciidoc +++ /dev/null @@ -1,650 +0,0 @@ -[[node-tool]] -== elasticsearch-node - -The `elasticsearch-node` command enables you to perform certain unsafe -operations on a node that are only possible while it is shut down. This command -allows you to adjust the <> of a node, unsafely edit cluster -settings and may be able to recover some data after a disaster or start a node -even if it is incompatible with the data on disk. - -[discrete] -=== Synopsis - -[source,shell] --------------------------------------------------- -bin/elasticsearch-node repurpose|unsafe-bootstrap|detach-cluster|override-version - [-E ] - [-h, --help] ([-s, --silent] | [-v, --verbose]) --------------------------------------------------- - -[discrete] -=== Description - -This tool has a number of modes: - -* `elasticsearch-node repurpose` can be used to delete unwanted data from a - node if it used to be a <> or a - <> but has been repurposed not to have one - or other of these roles. - -* `elasticsearch-node remove-settings` can be used to remove persistent settings - from the cluster state in case where it contains incompatible settings that - prevent the cluster from forming. - -* `elasticsearch-node remove-index-settings` can be used to remove index settings - from the cluster state in case where it contains incompatible index settings that - prevent the cluster from forming. - -* `elasticsearch-node remove-customs` can be used to remove custom metadata - from the cluster state in case where it contains broken metadata that - prevents the cluster state from being loaded. - -* `elasticsearch-node unsafe-bootstrap` can be used to perform _unsafe cluster - bootstrapping_. It forces one of the nodes to form a brand-new cluster on - its own, using its local copy of the cluster metadata. - -* `elasticsearch-node detach-cluster` enables you to move nodes from one - cluster to another. This can be used to move nodes into a new cluster - created with the `elasticsearch-node unsafe-bootstrap` command. If unsafe - cluster bootstrapping was not possible, it also enables you to move nodes - into a brand-new cluster. - -* `elasticsearch-node override-version` enables you to start up a node - even if the data in the data path was written by an incompatible version of - {es}. This may sometimes allow you to downgrade to an earlier version of - {es}. - -:tool-name: node -include::cli-jvm-options.asciidoc[] -:!tool-name: - -[[node-tool-repurpose]] -[discrete] -==== Changing the role of a node - -There may be situations where you want to repurpose a node without following -the <>. The `elasticsearch-node -repurpose` tool allows you to delete any excess on-disk data and start a node -after repurposing it. - -The intended use is: - -* Stop the node -* Update `elasticsearch.yml` by setting `node.roles` as desired. -* Run `elasticsearch-node repurpose` on the node -* Start the node - -If you run `elasticsearch-node repurpose` on a node without the `data` role and -with the `master` role then it will delete any remaining shard data on that -node, but it will leave the index and cluster metadata alone. If you run -`elasticsearch-node repurpose` on a node without the `data` and `master` roles -then it will delete any remaining shard data and index metadata, but it will -leave the cluster metadata alone. - -[WARNING] -Running this command can lead to data loss for the indices mentioned if the -data contained is not available on other nodes in the cluster. Only run this -tool if you understand and accept the possible consequences, and only after -determining that the node cannot be repurposed cleanly. - -The tool provides a summary of the data to be deleted and asks for confirmation -before making any changes. You can get detailed information about the affected -indices and shards by passing the verbose (`-v`) option. - -[discrete] -==== Removing persistent cluster settings - -There may be situations where a node contains persistent cluster -settings that prevent the cluster from forming. Since the cluster cannot form, -it is not possible to remove these settings using the -<> API. - -The `elasticsearch-node remove-settings` tool allows you to forcefully remove -those persistent settings from the on-disk cluster state. The tool takes a -list of settings as parameters that should be removed, and also supports -wildcard patterns. - -The intended use is: - -* Stop the node -* Run `elasticsearch-node remove-settings name-of-setting-to-remove` on the node -* Repeat for all other master-eligible nodes -* Start the nodes - -[discrete] -==== Removing index settings - -There may be situations where an index contains index settings -that prevent the cluster from forming. Since the cluster cannot form, -it is not possible to remove these settings using the -<> API. - -The `elasticsearch-node remove-index-settings` tool allows you to forcefully remove -those index settings from the on-disk cluster state. The tool takes a -list of index settings as parameters that should be removed, and also supports -wildcard patterns. - -The intended use is: - -* Stop the node -* Run `elasticsearch-node remove-index-settings name-of-index-setting-to-remove` on the node -* Repeat for all nodes -* Start the nodes - -[discrete] -==== Removing custom metadata from the cluster state - -There may be situations where a node contains custom metadata, typically -provided by plugins, that prevent the node from starting up and loading -the cluster from disk. - -The `elasticsearch-node remove-customs` tool allows you to forcefully remove -the problematic custom metadata. The tool takes a list of custom metadata names -as parameters that should be removed, and also supports wildcard patterns. - -The intended use is: - -* Stop the node -* Run `elasticsearch-node remove-customs name-of-custom-to-remove` on the node -* Repeat for all other master-eligible nodes -* Start the nodes - -[discrete] -==== Recovering data after a disaster - -Sometimes {es} nodes are temporarily stopped, perhaps because of the need to -perform some maintenance activity or perhaps because of a hardware failure. -After you resolve the temporary condition and restart the node, -it will rejoin the cluster and continue normally. Depending on your -configuration, your cluster may be able to remain completely available even -while one or more of its nodes are stopped. - -Sometimes it might not be possible to restart a node after it has stopped. For -example, the node's host may suffer from a hardware problem that cannot be -repaired. If the cluster is still available then you can start up a fresh node -on another host and {es} will bring this node into the cluster in place of the -failed node. - -Each node stores its data in the data directories defined by the -<>. This means that in a disaster you can -also restart a node by moving its data directories to another host, presuming -that those data directories can be recovered from the faulty host. - -{es} <> in order to elect a master and to update the cluster -state. This means that if you have three master-eligible nodes then the cluster -will remain available even if one of them has failed. However if two of the -three master-eligible nodes fail then the cluster will be unavailable until at -least one of them is restarted. - -In very rare circumstances it may not be possible to restart enough nodes to -restore the cluster's availability. If such a disaster occurs, you should -build a new cluster from a recent snapshot and re-import any data that was -ingested since that snapshot was taken. - -However, if the disaster is serious enough then it may not be possible to -recover from a recent snapshot either. Unfortunately in this case there is no -way forward that does not risk data loss, but it may be possible to use the -`elasticsearch-node` tool to construct a new cluster that contains some of the -data from the failed cluster. - -[[node-tool-override-version]] -[discrete] -==== Bypassing version checks - -The data that {es} writes to disk is designed to be read by the current version -and a limited set of future versions. It cannot generally be read by older -versions, nor by versions that are more than one major version newer. The data -stored on disk includes the version of the node that wrote it, and {es} checks -that it is compatible with this version when starting up. - -In rare circumstances it may be desirable to bypass this check and start up an -{es} node using data that was written by an incompatible version. This may not -work if the format of the stored data has changed, and it is a risky process -because it is possible for the format to change in ways that {es} may -misinterpret, silently leading to data loss. - -To bypass this check, you can use the `elasticsearch-node override-version` -tool to overwrite the version number stored in the data path with the current -version, causing {es} to believe that it is compatible with the on-disk data. - -[[node-tool-unsafe-bootstrap]] -[discrete] -===== Unsafe cluster bootstrapping - -If there is at least one remaining master-eligible node, but it is not possible -to restart a majority of them, then the `elasticsearch-node unsafe-bootstrap` -command will unsafely override the cluster's <> as if performing another -<>. -The target node can then form a new cluster on its own by using -the cluster metadata held locally on the target node. - -[WARNING] -These steps can lead to arbitrary data loss since the target node may not hold the latest cluster -metadata, and this out-of-date metadata may make it impossible to use some or -all of the indices in the cluster. - -Since unsafe bootstrapping forms a new cluster containing a single node, once -you have run it you must use the <> to migrate any other surviving nodes from the failed -cluster into this new cluster. - -When you run the `elasticsearch-node unsafe-bootstrap` tool it will analyse the -state of the node and ask for confirmation before taking any action. Before -asking for confirmation it reports the term and version of the cluster state on -the node on which it runs as follows: - -[source,txt] ----- -Current node cluster state (term, version) pair is (4, 12) ----- - -If you have a choice of nodes on which to run this tool then you should choose -one with a term that is as large as possible. If there is more than one -node with the same term, pick the one with the largest version. -This information identifies the node with the freshest cluster state, which minimizes the -quantity of data that might be lost. For example, if the first node reports -`(4, 12)` and a second node reports `(5, 3)`, then the second node is preferred -since its term is larger. However if the second node reports `(3, 17)` then -the first node is preferred since its term is larger. If the second node -reports `(4, 10)` then it has the same term as the first node, but has a -smaller version, so the first node is preferred. - -[WARNING] -Running this command can lead to arbitrary data loss. Only run this tool if you -understand and accept the possible consequences and have exhausted all other -possibilities for recovery of your cluster. - -The sequence of operations for using this tool are as follows: - -1. Make sure you have really lost access to at least half of the -master-eligible nodes in the cluster, and they cannot be repaired or recovered -by moving their data paths to healthy hardware. -2. Stop **all** remaining nodes. -3. Choose one of the remaining master-eligible nodes to become the new elected -master as described above. -4. On this node, run the `elasticsearch-node unsafe-bootstrap` command as shown -below. Verify that the tool reported `Master node was successfully -bootstrapped`. -5. Start this node and verify that it is elected as the master node. -6. Run the <>, described below, on every other node in the cluster. -7. Start all other nodes and verify that each one joins the cluster. -8. Investigate the data in the cluster to discover if any was lost during this -process. - -When you run the tool it will make sure that the node that is being used to -bootstrap the cluster is not running. It is important that all other -master-eligible nodes are also stopped while this tool is running, but the tool -does not check this. - -The message `Master node was successfully bootstrapped` does not mean that -there has been no data loss, it just means that tool was able to complete its -job. - -[[node-tool-detach-cluster]] -[discrete] -===== Detaching nodes from their cluster - -It is unsafe for nodes to move between clusters, because different clusters -have completely different cluster metadata. There is no way to safely merge the -metadata from two clusters together. - -To protect against inadvertently joining the wrong cluster, each cluster -creates a unique identifier, known as the _cluster UUID_, when it first starts -up. Every node records the UUID of its cluster and refuses to join a -cluster with a different UUID. - -However, if a node's cluster has permanently failed then it may be desirable to -try and move it into a new cluster. The `elasticsearch-node detach-cluster` -command lets you detach a node from its cluster by resetting its cluster UUID. -It can then join another cluster with a different UUID. - -For example, after unsafe cluster bootstrapping you will need to detach all the -other surviving nodes from their old cluster so they can join the new, -unsafely-bootstrapped cluster. - -Unsafe cluster bootstrapping is only possible if there is at least one -surviving master-eligible node. If there are no remaining master-eligible nodes -then the cluster metadata is completely lost. However, the individual data -nodes also contain a copy of the index metadata corresponding with their shards. This sometimes allows a new cluster to import these shards as -<>. You can sometimes -recover some indices after the loss of all main-eligible nodes in a cluster -by creating a new cluster and then using the `elasticsearch-node -detach-cluster` command to move any surviving nodes into this new cluster. Once the new cluster is fully formed, -use the <> to list, import or delete -any dangling indices. - -There is a risk of data loss when importing a dangling index because data nodes -may not have the most recent copy of the index metadata and do not have any -information about <>. This -means that a stale shard copy may be selected to be the primary, and some of -the shards may be incompatible with the imported mapping. - -[WARNING] -Execution of this command can lead to arbitrary data loss. Only run this tool -if you understand and accept the possible consequences and have exhausted all -other possibilities for recovery of your cluster. - -The sequence of operations for using this tool are as follows: - -1. Make sure you have really lost access to every one of the master-eligible -nodes in the cluster, and they cannot be repaired or recovered by moving their -data paths to healthy hardware. -2. Start a new cluster and verify that it is healthy. This cluster may comprise -one or more brand-new master-eligible nodes, or may be an unsafely-bootstrapped -cluster formed as described above. -3. Stop **all** remaining data nodes. -4. On each data node, run the `elasticsearch-node detach-cluster` tool as shown -below. Verify that the tool reported `Node was successfully detached from the -cluster`. -5. If necessary, configure each data node to -<>. -6. Start each data node and verify that it has joined the new cluster. -7. Wait for all recoveries to have completed, and investigate the data in the -cluster to discover if any was lost during this process. Use the -<> to list, import or delete any -dangling indices. - -The message `Node was successfully detached from the cluster` does not mean -that there has been no data loss, it just means that tool was able to complete -its job. - - -[discrete] -[[node-tool-parameters]] -=== Parameters - -`repurpose`:: Delete excess data when a node's roles are changed. - -`unsafe-bootstrap`:: Specifies to unsafely bootstrap this node as a new -one-node cluster. - -`detach-cluster`:: Specifies to unsafely detach this node from its cluster so -it can join a different cluster. - -`override-version`:: Overwrites the version number stored in the data path so -that a node can start despite being incompatible with the on-disk data. - -`remove-settings`:: Forcefully removes the provided persistent cluster settings -from the on-disk cluster state. - -`-E `:: Configures a setting. - -`-h, --help`:: Returns all of the command parameters. - -`-s, --silent`:: Shows minimal output. - -`-v, --verbose`:: Shows verbose output. - -[discrete] -=== Examples - -[discrete] -==== Repurposing a node as a dedicated master node - -In this example, a former data node is repurposed as a dedicated master node. -First update the node's settings to `node.roles: [ "master" ]` in its -`elasticsearch.yml` config file. Then run the `elasticsearch-node repurpose` -command to find and remove excess shard data: - -[source,txt] ----- -node$ ./bin/elasticsearch-node repurpose - - WARNING: Elasticsearch MUST be stopped before running this tool. - -Found 2 shards in 2 indices to clean up -Use -v to see list of paths and indices affected -Node is being re-purposed as master and no-data. Clean-up of shard data will be performed. -Do you want to proceed? -Confirm [y/N] y -Node successfully repurposed to master and no-data. ----- - -[discrete] -==== Repurposing a node as a coordinating-only node - -In this example, a node that previously held data is repurposed as a -coordinating-only node. First update the node's settings to `node.roles: []` in -its `elasticsearch.yml` config file. Then run the `elasticsearch-node repurpose` -command to find and remove excess shard data and index metadata: - - -[source,txt] ----- -node$./bin/elasticsearch-node repurpose - - WARNING: Elasticsearch MUST be stopped before running this tool. - -Found 2 indices (2 shards and 2 index meta data) to clean up -Use -v to see list of paths and indices affected -Node is being re-purposed as no-master and no-data. Clean-up of index data will be performed. -Do you want to proceed? -Confirm [y/N] y -Node successfully repurposed to no-master and no-data. ----- - -[discrete] -==== Removing persistent cluster settings - -If your nodes contain persistent cluster settings that prevent the cluster -from forming, i.e., can't be removed using the <> API, -you can run the following commands to remove one or more cluster settings. - -[source,txt] ----- -node$ ./bin/elasticsearch-node remove-settings xpack.monitoring.exporters.my_exporter.host - - WARNING: Elasticsearch MUST be stopped before running this tool. - -The following settings will be removed: -xpack.monitoring.exporters.my_exporter.host: "10.1.2.3" - -You should only run this tool if you have incompatible settings in the -cluster state that prevent the cluster from forming. -This tool can cause data loss and its use should be your last resort. - -Do you want to proceed? - -Confirm [y/N] y - -Settings were successfully removed from the cluster state ----- - -You can also use wildcards to remove multiple settings, for example using - -[source,txt] ----- -node$ ./bin/elasticsearch-node remove-settings xpack.monitoring.* ----- - -[discrete] -==== Removing index settings - -If your indices contain index settings that prevent the cluster -from forming, you can run the following command to remove one -or more index settings. - -[source,txt] ----- -node$ ./bin/elasticsearch-node remove-index-settings index.my_plugin.foo - - WARNING: Elasticsearch MUST be stopped before running this tool. - -You should only run this tool if you have incompatible index settings in the -cluster state that prevent the cluster from forming. -This tool can cause data loss and its use should be your last resort. - -Do you want to proceed? - -Confirm [y/N] y - -Index settings were successfully removed from the cluster state ----- - -You can also use wildcards to remove multiple index settings, for example using - -[source,txt] ----- -node$ ./bin/elasticsearch-node remove-index-settings index.my_plugin.* ----- - -[discrete] -==== Removing custom metadata from the cluster state - -If the on-disk cluster state contains custom metadata that prevents the node -from starting up and loading the cluster state, you can run the following -commands to remove this custom metadata. - -[source,txt] ----- -node$ ./bin/elasticsearch-node remove-customs snapshot_lifecycle - - WARNING: Elasticsearch MUST be stopped before running this tool. - -The following customs will be removed: -snapshot_lifecycle - -You should only run this tool if you have broken custom metadata in the -cluster state that prevents the cluster state from being loaded. -This tool can cause data loss and its use should be your last resort. - -Do you want to proceed? - -Confirm [y/N] y - -Customs were successfully removed from the cluster state ----- - -[discrete] -==== Unsafe cluster bootstrapping - -Suppose your cluster had five master-eligible nodes and you have permanently -lost three of them, leaving two nodes remaining. - -* Run the tool on the first remaining node, but answer `n` at the confirmation - step. - -[source,txt] ----- -node_1$ ./bin/elasticsearch-node unsafe-bootstrap - - WARNING: Elasticsearch MUST be stopped before running this tool. - -Current node cluster state (term, version) pair is (4, 12) - -You should only run this tool if you have permanently lost half or more -of the master-eligible nodes in this cluster, and you cannot restore the -cluster from a snapshot. This tool can cause arbitrary data loss and its -use should be your last resort. If you have multiple surviving master -eligible nodes, you should run this tool on the node with the highest -cluster state (term, version) pair. - -Do you want to proceed? - -Confirm [y/N] n ----- - -* Run the tool on the second remaining node, and again answer `n` at the - confirmation step. - -[source,txt] ----- -node_2$ ./bin/elasticsearch-node unsafe-bootstrap - - WARNING: Elasticsearch MUST be stopped before running this tool. - -Current node cluster state (term, version) pair is (5, 3) - -You should only run this tool if you have permanently lost half or more -of the master-eligible nodes in this cluster, and you cannot restore the -cluster from a snapshot. This tool can cause arbitrary data loss and its -use should be your last resort. If you have multiple surviving master -eligible nodes, you should run this tool on the node with the highest -cluster state (term, version) pair. - -Do you want to proceed? - -Confirm [y/N] n ----- - -* Since the second node has a greater term it has a fresher cluster state, so - it is better to unsafely bootstrap the cluster using this node: - -[source,txt] ----- -node_2$ ./bin/elasticsearch-node unsafe-bootstrap - - WARNING: Elasticsearch MUST be stopped before running this tool. - -Current node cluster state (term, version) pair is (5, 3) - -You should only run this tool if you have permanently lost half or more -of the master-eligible nodes in this cluster, and you cannot restore the -cluster from a snapshot. This tool can cause arbitrary data loss and its -use should be your last resort. If you have multiple surviving master -eligible nodes, you should run this tool on the node with the highest -cluster state (term, version) pair. - -Do you want to proceed? - -Confirm [y/N] y -Master node was successfully bootstrapped ----- - -[discrete] -==== Detaching nodes from their cluster - -After unsafely bootstrapping a new cluster, run the `elasticsearch-node -detach-cluster` command to detach all remaining nodes from the failed cluster -so they can join the new cluster: - -[source, txt] ----- -node_3$ ./bin/elasticsearch-node detach-cluster - - WARNING: Elasticsearch MUST be stopped before running this tool. - -You should only run this tool if you have permanently lost all of the -master-eligible nodes in this cluster and you cannot restore the cluster -from a snapshot, or you have already unsafely bootstrapped a new cluster -by running `elasticsearch-node unsafe-bootstrap` on a master-eligible -node that belonged to the same cluster as this node. This tool can cause -arbitrary data loss and its use should be your last resort. - -Do you want to proceed? - -Confirm [y/N] y -Node was successfully detached from the cluster ----- - -[discrete] -==== Bypassing version checks - -Run the `elasticsearch-node override-version` command to overwrite the version -stored in the data path so that a node can start despite being incompatible -with the data stored in the data path: - -[source, txt] ----- -node$ ./bin/elasticsearch-node override-version - - WARNING: Elasticsearch MUST be stopped before running this tool. - -This data path was last written by Elasticsearch version [x.x.x] and may no -longer be compatible with Elasticsearch version [y.y.y]. This tool will bypass -this compatibility check, allowing a version [y.y.y] node to start on this data -path, but a version [y.y.y] node may not be able to read this data or may read -it incorrectly leading to data loss. - -You should not use this tool. Instead, continue to use a version [x.x.x] node -on this data path. If necessary, you can use reindex-from-remote to copy the -data from here into an older cluster. - -Do you want to proceed? - -Confirm [y/N] y -Successfully overwrote this node's metadata to bypass its version compatibility checks. ----- diff --git a/docs/reference/commands/reconfigure-node.asciidoc b/docs/reference/commands/reconfigure-node.asciidoc deleted file mode 100644 index 838de8909bdac..0000000000000 --- a/docs/reference/commands/reconfigure-node.asciidoc +++ /dev/null @@ -1,69 +0,0 @@ -[[reconfigure-node]] -== elasticsearch-reconfigure-node - -The `elasticsearch-reconfigure-node` tool reconfigures an {es} node that was installed -through an RPM or DEB package to join an existing cluster with security features enabled. - -[discrete] -=== Synopsis - -[source,shell] ------------------------------------------------------ -bin/elasticsearch-reconfigure-node -[--enrollment-token] [-h, --help] [-E ] -[-s, --silent] [-v, --verbose] ------------------------------------------------------ - - -[discrete] -=== Description - -When installing {es} with a DEB or RPM package, the current node is assumed to -be the first node in the cluster. {es} enables and configures security -features on the node, generates a password for the `elastic` superuser, and -configures TLS for the HTTP and transport layers. - -Rather than form a single-node cluster, you can add a node to an existing -cluster where security features are already enabled and configured. Before -starting your new node, run the -<> tool -with the `-s node` option to generate an enrollment token on any node in your -existing cluster. On your new node, run the -`elasticsearch-reconfigure-node` tool and pass the enrollment token as a -parameter. - -NOTE: This tool is intended only for use on DEB or RPM distributions of {es}. - -You must run this tool with `sudo` so that it can edit the necessary -files in your {es} installation configuration directory that are owned by -`root:elasticsearch`. - - -[discrete] -[[reconfigure-node-parameters]] -=== Parameters - -`--enrollment-token`:: The enrollment token, which can be generated on any of the -nodes in an existing, secured cluster. - -`-E `:: Configures a standard {es} or {xpack} setting. - -`-h, --help`:: Shows help information. - -`-s, --silent`:: Shows minimal output. - -`-v, --verbose`:: Shows verbose output. - - -:tool-name: reconfigure-node -include::cli-jvm-options.asciidoc[] -:!tool-name: - -[discrete] -=== Examples - -The following example reconfigures an installed {es} node so that it can join an existing cluster when it starts for the first time. -[source,shell] ----- -sudo /usr/share/elasticsearch/elasticsearch-reconfigure-node --enrollment-token eyJ2ZXIiOiI4LjAuMCIsImFkciI6WyIxOTIuMTY4LjEuMTY6OTIwMCJdLCJmZ3IiOiI4NGVhYzkyMzAyMWQ1MjcyMmQxNTFhMTQwZmM2ODI5NmE5OWNiNmU0OGVhZjYwYWMxYzljM2I3ZDJjOTg2YTk3Iiwia2V5IjoiUy0yUjFINEJrNlFTMkNEY1dVV1g6QS0wSmJxM3hTRy1haWxoQTdPWVduZyJ9 ----- diff --git a/docs/reference/commands/reset-password.asciidoc b/docs/reference/commands/reset-password.asciidoc deleted file mode 100644 index b8823158d0d0f..0000000000000 --- a/docs/reference/commands/reset-password.asciidoc +++ /dev/null @@ -1,93 +0,0 @@ -[roles="xpack"] -[[reset-password]] -== elasticsearch-reset-password - -The `elasticsearch-reset-password` command resets the passwords of users in -the native realm and built-in users. - - -[discrete] -=== Synopsis - -[source,shell] ----- -bin/elasticsearch-reset-password -[-a, --auto] [-b, --batch] [-E > to run the request that changes the user password. - -IMPORTANT: You cannot use this tool if the file realm is disabled in your `elasticsearch.yml` file. - -This command uses an HTTP connection to connect to the cluster and run the user -management requests. The command automatically attempts to establish the connection -over HTTPS by using the `xpack.security.http.ssl` settings in -the `elasticsearch.yml` file. If you do not use the default configuration directory -location, ensure that the `ES_PATH_CONF` environment variable returns the -correct path before you run the `elasticsearch-reset-password` command. You can -override settings in your `elasticsearch.yml` file by using the `-E` command -option. For more information about debugging connection failures, see -<>. - -[discrete] -[[reset-password-parameters]] -=== Parameters - - -`-a, --auto`:: Resets the password of the specified user to an auto-generated strong password. (Default) - -`-b, --batch`:: Runs the reset password process without prompting the user for verification. - -`-E `:: Configures a standard {es} or {xpack} setting. - -`-f, --force`:: Forces the command to run against an unhealthy cluster. - -`-h, --help`:: Returns all of the command parameters. - -`-i, --interactive`:: Prompts for the password of the specified user. Use this option to explicitly set a password. - -`-s --silent`:: Shows minimal output in the console. - -`-u, --username`:: The username of the native realm user or built-in user. - -`--url`:: Specifies the base URL (hostname and port of the local node) that the tool uses to submit API -requests to {es}. The default value is determined from the settings in your -`elasticsearch.yml` file. If `xpack.security.http.ssl.enabled` is set to `true`, -you must specify an HTTPS URL. - -`-v --verbose`:: Shows verbose output in the console. -[discrete] -=== Examples - -The following example resets the password of the `elastic` user to an auto-generated value and -prints the new password in the console: - -[source,shell] ----- -bin/elasticsearch-reset-password -u elastic ----- - -The following example resets the password of a native user with username `user1` after prompting -in the terminal for the desired password: - -[source,shell] ----- -bin/elasticsearch-reset-password --username user1 -i ----- - -The following example resets the password of a native user with username `user2` to an auto-generated value -prints the new password in the console. The specified URL indicates where the elasticsearch-reset-password -tool attempts to reach the local {es} node: -[source,shell] ----- -bin/elasticsearch-reset-password --url "https://172.0.0.3:9200" --username user2 -i ----- diff --git a/docs/reference/commands/saml-metadata.asciidoc b/docs/reference/commands/saml-metadata.asciidoc deleted file mode 100644 index 8c145f541d930..0000000000000 --- a/docs/reference/commands/saml-metadata.asciidoc +++ /dev/null @@ -1,136 +0,0 @@ -[[saml-metadata]] -== elasticsearch-saml-metadata - -The `elasticsearch-saml-metadata` command can be used to generate a SAML 2.0 Service -Provider Metadata file. - -[discrete] -=== Synopsis - -[source,shell] --------------------------------------------------- -bin/elasticsearch-saml-metadata -[--realm ] -[--out ] [--batch] -[--attribute ] [--service-name ] -[--locale ] [--contacts] -([--organisation-name ] [--organisation-display-name ] [--organisation-url ]) -([--signing-bundle ] | [--signing-cert ][--signing-key ]) -[--signing-key-password ] -[-E ] -[-h, --help] ([-s, --silent] | [-v, --verbose]) --------------------------------------------------- - -[discrete] -=== Description - -The SAML 2.0 specification provides a mechanism for Service Providers to -describe their capabilities and configuration using a _metadata file_. - -The `elasticsearch-saml-metadata` command generates such a file, based on the -configuration of a SAML realm in {es}. - -Some SAML Identity Providers will allow you to automatically import a metadata -file when you configure the Elastic Stack as a Service Provider. - -You can optionally select to digitally sign the metadata file in order to -ensure its integrity and authenticity before sharing it with the Identity Provider. -The key used for signing the metadata file need not necessarily be the same as -the keys already used in the saml realm configuration for SAML message signing. - -If your {es} keystore is password protected, you -are prompted to enter the password when you run the -`elasticsearch-saml-metadata` command. - -[discrete] -[[saml-metadata-parameters]] -=== Parameters - -`--attribute `:: Specifies a SAML attribute that should be -included as a `` element in the metadata. Any attribute -configured in the {es} realm is automatically included and does not need to be -specified as a commandline option. - -`--batch`:: Do not prompt for user input. - -`--contacts`:: Specifies that the metadata should include one or more -`` elements. The user will be prompted to enter the details for -each person. - -`-E `:: Configures an {es} setting. - -`-h, --help`:: Returns all of the command parameters. - -`--locale `:: Specifies the locale to use for metadata elements such as -``. Defaults to the JVM's default system locale. - -`--organisation-display-name ` element. -Only valid if `--organisation-name` is also specified. - -`--organisation-name `:: Specifies that an `` element should -be included in the metadata and provides the value for the ``. -If this is specified, then `--organisation-url` must also be specified. - -`--organisation-url `:: Specifies the value of the `` -element. This is required if `--organisation-name` is specified. - -`--out `:: Specifies a path for the output files. -Defaults to `saml-elasticsearch-metadata.xml` - -`--service-name `:: Specifies the value for the `` element in -the metadata. Defaults to `elasticsearch`. - -`--signing-bundle `:: Specifies the path to an existing key pair -(in PKCS#12 format). The private key of that key pair will be used to sign -the metadata file. - -`--signing-cert `:: Specifies the path to an existing certificate (in -PEM format) to be used for signing of the metadata file. You must also specify -the `--signing-key` parameter. This parameter cannot be used with the -`--signing-bundle` parameter. - -`--signing-key `:: Specifies the path to an existing key (in PEM format) -to be used for signing of the metadata file. You must also specify the -`--signing-cert` parameter. This parameter cannot be used with the -`--signing-bundle` parameter. - -`--signing-key-password `:: Specifies the password for the signing key. -It can be used with either the `--signing-key` or the `--signing-bundle` parameters. - -`--realm `:: Specifies the name of the realm for which the metadata -should be generated. This parameter is required if there is more than 1 `saml` -realm in your {es} configuration. - -`-s, --silent`:: Shows minimal output. - -`-v, --verbose`:: Shows verbose output. - -[discrete] -=== Examples - -The following command generates a default metadata file for the `saml1` realm: - -[source, sh] --------------------------------------------------- -bin/elasticsearch-saml-metadata --realm saml1 --------------------------------------------------- - -The file will be written to `saml-elasticsearch-metadata.xml`. -You may be prompted to provide the "friendlyName" value for any attributes that -are used by the realm. - -The following command generates a metadata file for the `saml2` realm, with a -`` of `kibana-finance`, a locale of `en-GB` and includes -`` elements and an `` element: - -[source, sh] --------------------------------------------------- -bin/elasticsearch-saml-metadata --realm saml2 \ - --service-name kibana-finance \ - --locale en-GB \ - --contacts \ - --organisation-name "Mega Corp. Finance Team" \ - --organisation-url "http://mega.example.com/finance/" --------------------------------------------------- - diff --git a/docs/reference/commands/service-tokens-command.asciidoc b/docs/reference/commands/service-tokens-command.asciidoc deleted file mode 100644 index 026bdc2aac029..0000000000000 --- a/docs/reference/commands/service-tokens-command.asciidoc +++ /dev/null @@ -1,155 +0,0 @@ -[[service-tokens-command]] -== elasticsearch-service-tokens - -Use the `elasticsearch-service-tokens` command to create, list, and delete file-based service account tokens. - -[discrete] -=== Synopsis - -[source,shell] ----- -bin/elasticsearch-service-tokens -([create ]) | -([list] []) | -([delete ]) ----- - -[discrete] -=== Description - -NOTE: The recommended way to manage <> -is via the <> API. -File based tokens are intended for use with orchestrators such as -{ece-ref}[{ece}] and {eck-ref}[{eck}] - -This command creates a `service_tokens` file in the `$ES_HOME/config` directory -when you create the first service account token. This file does not exist by -default. {es} monitors this file for changes and dynamically reloads it. - -This command only makes changes to the `service_tokens` file on the local node. -If the service token will be used to authenticate requests against multiple nodes -in the cluster then you must copy the `service_tokens` file to each node. - -See <> for further information about the -behaviour of service accounts and the management of service tokens. - -IMPORTANT: To ensure that {es} can read the service account token information at -startup, run `elasticsearch-service-tokens` as the same user you use to run -{es}. Running this command as `root` or some other user updates the permissions -for the `service_tokens` file and prevents {es} from accessing it. - -[discrete] -[[service-tokens-command-parameters]] -=== Parameters - -`create`:: -Creates a service account token for the specified service account. -+ -.Properties of `create` -[%collapsible%open] -==== -``::: -(Required, string) Service account principal that takes the format of -`/`, where the `namespace` is a top-level grouping of -service accounts, and `service` is the name of the service. For example, `elastic/fleet-server`. -+ -The service account principal must match a known service account. - -``::: -(Required, string) An identifier for the token name. -+ --- -Token names must be at least 1 and no more than 256 characters. They can contain -alphanumeric characters (`a-z`, `A-Z`, `0-9`), dashes (`-`), and underscores -(`_`), but cannot begin with an underscore. - -NOTE: Token names must be unique in the context of the associated service -account. --- -==== - -`list`:: -Lists all service account tokens defined in the `service_tokens` file. If you -specify a service account principal, the command lists only the tokens that -belong to the specified service account. -+ -.Properties of `list` -[%collapsible%open] -==== -``::: -(Optional, string) Service account principal that takes the format of -`/`, where the `namespace` is a top-level grouping of -service accounts, and `service` is the name of the service. For example, `elastic/fleet-server`. -+ -The service account principal must match a known service account. -==== - -`delete`:: -Deletes a service account token for the specified service account. -+ -.Properties of `delete` -[%collapsible%open] -==== -``::: -(Required, string) Service account principal that takes the format of -`/`, where the `namespace` is a top-level grouping of -service accounts, and `service` is the name of the service. For example, `elastic/fleet-server`. -+ -The service account principal must match a known service account. -==== - -``::: -(Required, string) Name of an existing token. - -[discrete] -=== Examples - -The following command creates a service account token named `my-token` for -the `elastic/fleet-server` service account. - -[source,shell] ----- -bin/elasticsearch-service-tokens create elastic/fleet-server my-token ----- - -The output is a bearer token, which is a Base64 encoded string. - -[source,shell] ----- -SERVICE_TOKEN elastic/fleet-server/my-token = AAEAAWVsYXN0aWM...vZmxlZXQtc2VydmVyL3Rva2VuMTo3TFdaSDZ ----- - -Use this bearer token to authenticate with your {es} cluster. - -[source,shell] ----- -curl -H "Authorization: Bearer AAEAAWVsYXN0aWM...vZmxlZXQtc2VydmVyL3Rva2VuMTo3TFdaSDZ" http://localhost:9200/_cluster/health ----- -// NOTCONSOLE - -NOTE: If your node has `xpack.security.http.ssl.enabled` set to `true`, then -you must specify `https` in the request URL. - -The following command lists all service account tokens that are defined in the -`service_tokens` file. - -[source,shell] ----- -bin/elasticsearch-service-tokens list ----- - -A list of all service account tokens displays in your terminal: - -[source,txt] ----- -elastic/fleet-server/my-token -elastic/fleet-server/another-token ----- - -The following command deletes the `my-token` service account token for the -`elastic/fleet-server` service account: - -[source,shell] ----- -bin/elasticsearch-service-tokens delete elastic/fleet-server my-token ----- diff --git a/docs/reference/commands/setup-passwords.asciidoc b/docs/reference/commands/setup-passwords.asciidoc deleted file mode 100644 index c121b67b02a2f..0000000000000 --- a/docs/reference/commands/setup-passwords.asciidoc +++ /dev/null @@ -1,76 +0,0 @@ -[[setup-passwords]] -== elasticsearch-setup-passwords - -deprecated[8.0, "The `elasticsearch-setup-passwords` tool is deprecated and will be removed in a future release. To manually reset the password for the built-in users (including the `elastic` user), use the <> tool, the {es} change password API, or the User Management features in {kib}."] - -The `elasticsearch-setup-passwords` command sets the passwords for the -<>. - -[discrete] -=== Synopsis - -[source,shell] --------------------------------------------------- -bin/elasticsearch-setup-passwords auto|interactive -[-b, --batch] [-h, --help] [-E ] -[-s, --silent] [-u, --url ""] [-v, --verbose] --------------------------------------------------- - -[discrete] -=== Description - -This command is intended for use only during the initial configuration of the -{es} {security-features}. It uses the -<> -to run user management API requests. If your {es} keystore is password protected, -before you can set the passwords for the built-in users, you must enter the keystore password. -After you set a password for the `elastic` -user, the bootstrap password is no longer active and you cannot use this command. -Instead, you can change passwords by using the *Management > Users* UI in {kib} -or the <>. - -This command uses an HTTP connection to connect to the cluster and run the user -management requests. If your cluster uses TLS/SSL on the HTTP layer, the command -automatically attempts to establish the connection by using the HTTPS protocol. -It configures the connection by using the `xpack.security.http.ssl` settings in -the `elasticsearch.yml` file. If you do not use the default config directory -location, ensure that the *ES_PATH_CONF* environment variable returns the -correct path before you run the `elasticsearch-setup-passwords` command. You can -override settings in your `elasticsearch.yml` file by using the `-E` command -option. For more information about debugging connection failures, see -<>. - -[discrete] -[[setup-passwords-parameters]] -=== Parameters - -`auto`:: Outputs randomly-generated passwords to the console. - -`-b, --batch`:: If enabled, runs the change password process without prompting the -user. - -`-E `:: Configures a standard {es} or {xpack} setting. - -`-h, --help`:: Shows help information. - -`interactive`:: Prompts you to manually enter passwords. - -`-s, --silent`:: Shows minimal output. - -`-u, --url ""`:: Specifies the URL that the tool uses to submit the user management API -requests. The default value is determined from the settings in your -`elasticsearch.yml` file. If `xpack.security.http.ssl.enabled` is set to `true`, -you must specify an HTTPS URL. - -`-v, --verbose`:: Shows verbose output. - -[discrete] -=== Examples - -The following example uses the `-u` parameter to tell the tool where to submit -its user management API requests: - -[source,shell] --------------------------------------------------- -bin/elasticsearch-setup-passwords auto -u "http://localhost:9201" --------------------------------------------------- diff --git a/docs/reference/commands/shard-tool.asciidoc b/docs/reference/commands/shard-tool.asciidoc deleted file mode 100644 index b1e63740cede0..0000000000000 --- a/docs/reference/commands/shard-tool.asciidoc +++ /dev/null @@ -1,127 +0,0 @@ -[[shard-tool]] -== elasticsearch-shard - -In some cases the Lucene index or translog of a shard copy can become corrupted. -The `elasticsearch-shard` command enables you to remove corrupted parts of the -shard if a good copy of the shard cannot be recovered automatically or restored -from backup. - -[WARNING] -You will lose the corrupted data when you run `elasticsearch-shard`. This tool -should only be used as a last resort if there is no way to recover from another -copy of the shard or restore a snapshot. - -[discrete] -=== Synopsis - -[source,shell] --------------------------------------------------- -bin/elasticsearch-shard remove-corrupted-data - ([--index ] [--shard-id ] | [--dir ]) - [--truncate-clean-translog] - [-E ] - [-h, --help] ([-s, --silent] | [-v, --verbose]) --------------------------------------------------- - -[discrete] -=== Description - -When {es} detects that a shard's data is corrupted, it fails that shard copy and -refuses to use it. Under normal conditions, the shard is automatically recovered -from another copy. If no good copy of the shard is available and you cannot -restore one from a snapshot, you can use `elasticsearch-shard` to remove the -corrupted data and restore access to any remaining data in unaffected segments. - -[WARNING] -Stop Elasticsearch before running `elasticsearch-shard`. - -To remove corrupted shard data use the `remove-corrupted-data` subcommand. - -There are two ways to specify the path: - -* Specify the index name and shard name with the `--index` and `--shard-id` - options. -* Use the `--dir` option to specify the full path to the corrupted index or - translog files. - -:tool-name: shard -include::cli-jvm-options.asciidoc[] -:!tool-name: - -[discrete] -==== Removing corrupted data - -`elasticsearch-shard` analyses the shard copy and provides an overview of the -corruption found. To proceed you must then confirm that you want to remove the -corrupted data. - -[WARNING] -Back up your data before running `elasticsearch-shard`. This is a destructive -operation that removes corrupted data from the shard. - -[source,txt] --------------------------------------------------- -$ bin/elasticsearch-shard remove-corrupted-data --index my-index-000001 --shard-id 0 - - - WARNING: Elasticsearch MUST be stopped before running this tool. - - Please make a complete backup of your index before using this tool. - - -Opening Lucene index at /var/lib/elasticsearchdata/indices/P45vf_YQRhqjfwLMUvSqDw/0/index/ - - >> Lucene index is corrupted at /var/lib/elasticsearchdata/indices/P45vf_YQRhqjfwLMUvSqDw/0/index/ - -Opening translog at /var/lib/elasticsearchdata/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/ - - - >> Translog is clean at /var/lib/elasticsearchdata/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/ - - - Corrupted Lucene index segments found - 32 documents will be lost. - - WARNING: YOU WILL LOSE DATA. - -Continue and remove docs from the index ? Y - -WARNING: 1 broken segments (containing 32 documents) detected -Took 0.056 sec total. -Writing... -OK -Wrote new segments file "segments_c" -Marking index with the new history uuid : 0pIBd9VTSOeMfzYT6p0AsA -Changing allocation id V8QXk-QXSZinZMT-NvEq4w to tjm9Ve6uTBewVFAlfUMWjA - -You should run the following command to allocate this shard: - -POST /_cluster/reroute -{ - "commands" : [ - { - "allocate_stale_primary" : { - "index" : "index42", - "shard" : 0, - "node" : "II47uXW2QvqzHBnMcl2o_Q", - "accept_data_loss" : false - } - } - ] -} - -You must accept the possibility of data loss by changing the `accept_data_loss` parameter to `true`. - -Deleted corrupt marker corrupted_FzTSBSuxT7i3Tls_TgwEag from /var/lib/elasticsearchdata/indices/P45vf_YQRhqjfwLMUvSqDw/0/index/ - --------------------------------------------------- - -When you use `elasticsearch-shard` to drop the corrupted data, the shard's -allocation ID changes. After restarting the node, you must use the -<> to tell Elasticsearch to use the new ID. -The `elasticsearch-shard` command shows the request that you need to submit. - -You can also use the `-h` option to get a list of all options and parameters -that the `elasticsearch-shard` tool supports. - -Finally, you can use the `--truncate-clean-translog` option to truncate the -shard's translog even if it does not appear to be corrupt. diff --git a/docs/reference/commands/syskeygen.asciidoc b/docs/reference/commands/syskeygen.asciidoc deleted file mode 100644 index cac0f5edaee4d..0000000000000 --- a/docs/reference/commands/syskeygen.asciidoc +++ /dev/null @@ -1,52 +0,0 @@ -[[syskeygen]] -== elasticsearch-syskeygen - -The `elasticsearch-syskeygen` command creates a system key file in the -elasticsearch config directory. - -[discrete] -=== Synopsis - -[source,shell] --------------------------------------------------- -bin/elasticsearch-syskeygen -[-E ] [-h, --help] -([-s, --silent] | [-v, --verbose]) --------------------------------------------------- - -[discrete] -=== Description - -The command generates a `system_key` file, which you can use to symmetrically -encrypt sensitive data. For example, you can use this key to prevent {watcher} -from returning and storing information that contains clear text credentials. See -<>. - -IMPORTANT: The system key is a symmetric key, so the same key must be used on -every node in the cluster. - -[discrete] -[[syskeygen-parameters]] -=== Parameters - -`-E `:: Configures a setting. For example, if you have a custom -installation of {es}, you can use this parameter to specify the `ES_PATH_CONF` -environment variable. - -`-h, --help`:: Returns all of the command parameters. - -`-s, --silent`:: Shows minimal output. - -`-v, --verbose`:: Shows verbose output. - - -[discrete] -=== Examples - -The following command generates a `system_key` file in the -default `$ES_HOME/config` directory: - -[source, sh] --------------------------------------------------- -bin/elasticsearch-syskeygen --------------------------------------------------- diff --git a/docs/reference/commands/users-command.asciidoc b/docs/reference/commands/users-command.asciidoc deleted file mode 100644 index 7094f288b73d3..0000000000000 --- a/docs/reference/commands/users-command.asciidoc +++ /dev/null @@ -1,136 +0,0 @@ -[[users-command]] -== elasticsearch-users - -If you use file-based user authentication, the `elasticsearch-users` command -enables you to add and remove users, assign user roles, and manage passwords -per node. - -[discrete] -=== Synopsis - -[source,shell] --------------------------------------------------- -bin/elasticsearch-users -([useradd ] [-p ] [-r ]) | -([list] ) | -([passwd ] [-p ]) | -([roles ] [-a ] [-r ]) | -([userdel ]) --------------------------------------------------- - -[discrete] -=== Description - -If you use the built-in `file` internal realm, users are defined in local files -on each node in the cluster. - -Usernames and roles must be at least 1 and no more than 1024 characters. They -can contain alphanumeric characters (`a-z`, `A-Z`, `0-9`), spaces, punctuation, -and printable symbols in the -{wikipedia}/Basic_Latin_(Unicode_block)[Basic Latin (ASCII) block]. -Leading or trailing whitespace is not allowed. - -Passwords must be at least 6 characters long. - -For more information, see <>. - -TIP: To ensure that {es} can read the user and role information at startup, run -`elasticsearch-users useradd` as the same user you use to run {es}. Running the -command as root or some other user updates the permissions for the `users` and -`users_roles` files and prevents {es} from accessing them. - -[discrete] -[[users-command-parameters]] -=== Parameters - -`-a `:: If used with the `roles` parameter, adds a comma-separated list -of roles to a user. - -//`-h, --help`:: Returns all of the command parameters. - -`list`:: List the users that are registered with the `file` realm -on the local node. If you also specify a user name, the command provides -information for that user. - -`-p `:: Specifies the user's password. If you do not specify this -parameter, the command prompts you for the password. -+ --- -TIP: Omit the `-p` option to keep -plaintext passwords out of the terminal session's command history. - --- - -`passwd `:: Resets a user's password. You can specify the new -password directly with the `-p` parameter. - -`-r `:: -* If used with the `useradd` parameter, defines a user's roles. This option -accepts a comma-separated list of role names to assign to the user. -* If used with the `roles` parameter, removes a comma-separated list of roles -from a user. - -`roles`:: Manages the roles of a particular user. You can combine adding and -removing roles within the same command to change a user's roles. - -//`-s, --silent`:: Shows minimal output. - -`useradd `:: Adds a user to your local node. - -`userdel `:: Deletes a user from your local node. - -//`-v, --verbose`:: Shows verbose output. - -//[discrete] -//=== Authorization - -[discrete] -=== Examples - -The following example adds a new user named `jacknich` to the `file` realm. The -password for this user is `theshining`, and this user is associated with the -`network` and `monitoring` roles. - -[source,shell] -------------------------------------------------------------------- -bin/elasticsearch-users useradd jacknich -p theshining -r network,monitoring -------------------------------------------------------------------- - -The following example lists the users that are registered with the `file` realm -on the local node: - -[source, shell] ----------------------------------- -bin/elasticsearch-users list -rdeniro : admin -alpacino : power_user -jacknich : monitoring,network ----------------------------------- - -Users are in the left-hand column and their corresponding roles are listed in -the right-hand column. - -The following example resets the `jacknich` user's password: - -[source,shell] --------------------------------------------------- -bin/elasticsearch-users passwd jachnich --------------------------------------------------- - -Since the `-p` parameter was omitted, the command prompts you to enter and -confirm a password in interactive mode. - -The following example removes the `network` and `monitoring` roles from the -`jacknich` user and adds the `user` role: - -[source,shell] ------------------------------------------------------------- -bin/elasticsearch-users roles jacknich -r network,monitoring -a user ------------------------------------------------------------- - -The following example deletes the `jacknich` user: - -[source,shell] --------------------------------------------------- -bin/elasticsearch-users userdel jacknich --------------------------------------------------- diff --git a/docs/reference/community-contributed.md b/docs/reference/community-contributed.md new file mode 100644 index 0000000000000..4978f22bebfcf --- /dev/null +++ b/docs/reference/community-contributed.md @@ -0,0 +1,157 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/client/community/current/index.html +--- + +# Community-contributed clients + +:::{note} +This is a list of clients submitted by members of the Elastic community. Elastic does not support or endorse these clients. + +If you'd like to add a new client to this list, please [open a pull request](https://github.com/elastic/elasticsearch/blob/main/CONTRIBUTING.md). +::: + +Besides the [officially supported Elasticsearch clients](docs-content://reference/elasticsearch/clients/index.md), there are +a number of clients that have been contributed by the community for various languages. + +## B4J [b4j] + +* [jElasticsearch](https://www.b4x.com/android/forum/threads/server-jelasticsearch-search-and-text-analytics.73335/): B4J client based on the official Java REST client. **Last release more than a year ago** + +## C++ [cpp] + +* [elasticlient](https://github.com/seznam/elasticlient): simple library for simplified work with Elasticsearch in C++. **Last commit more than a year ago** + +## Clojure [clojure] + +* [Spandex](https://github.com/mpenet/spandex): Clojure client, based on the new official low-level REST client. + +* [Elastisch](https://github.com/clojurewerkz/elastisch): Clojure client. **Last commit more than a year ago** + +## ColdFusion (CFML) [coldfusion] + +* [cbElasticSearch](https://www.forgebox.io/view/cbelasticsearch): Native ColdFusion (CFML) support for the ColdBox MVC Platform which provides you with a fluent search interface for Elasticsearch, in addition to a CacheBox Cache provider and a Logbox Appender for logging. + +## Erlang [erlang] + +* [erlastic_search](https://github.com/tsloughter/erlastic_search): Erlang client using HTTP. **Last commit more than a year ago** + +* [Tirexs](https://github.com/datahogs/tirexs): An [Elixir](https://github.com/elixir-lang/elixir) based API/DSL, inspired by [Tire](https://github.com/karmi/tire). Ready to use in pure Erlang environment. **Last commit more than a year ago** + +* [Elixir Bulk Processor](https://github.com/sashman/elasticsearch_elixir_bulk_processor): Dynamically configurable Elixir port of the [Bulk Processor](https://www.elastic.co/guide/en/elasticsearch/client/java-api/current/java-docs-bulk-processor.html). Implemented using GenStages to handle back pressure. **Last commit more than a year ago** + +## Go [go] + +Also see the [official Elasticsearch Go client](go-elasticsearch://docs/reference/index.md). + +* [elastigo](https://github.com/mattbaird/elastigo): Go client. **Last commit more than a year ago** + +* [elastic](https://github.com/olivere/elastic): Elasticsearch client for Google Go. **Last commit more than a year ago** + +* [elk](https://github.com/softctrl/elk): Golang lib for Elasticsearch client. **Last commit more than a year ago** + +## Haskell [haskell] + +* [bloodhound](https://github.com/bitemyapp/bloodhound): Haskell client and DSL. + +## Java [java] + +Also see the [official Elasticsearch Java client](elasticsearch-java://docs/reference/index.md). + +* [Flummi](https://github.com/otto-de/flummi): Java Rest client with comprehensive Query DSL API. + +* [Jest](https://github.com/searchbox-io/Jest): Java Rest client. **No longer maintained** + +## JavaScript [javascript] + +See the [official Elasticsearch JavaScript client](elasticsearch-js://docs/reference/index.md). + +## Julia [julia] + +* [ElasticsearchClient.jl](https://github.com/OpenSesame/ElasticsearchClient.jl): Elasticsearch client inspired by the [official Elasticsearch Ruby client](elasticsearch-ruby://docs/reference/index.md). + +## Kotlin [kotlin] + +* [ES Kotlin](https://github.com/mbuhot/eskotlin): Elasticsearch Query DSL for kotlin based on the [official Elasticsearch Java client](elasticsearch-java://docs/reference/index.md). **Last commit more than a year ago** + +* [ES Kotlin Wrapper Client](https://github.com/jillesvangurp/es-kotlin-wrapper-client): Kotlin extension functions and abstractions for the [official Elasticsearch high-level client](https://www.elastic.co/guide/en/elasticsearch/client/java-api/current/index.html). Aims to reduce the amount of boilerplate needed to do searches, bulk indexing and other common things users do with the client. **No longer maintained** + +## Lua [lua] + +* [elasticsearch-lua](https://github.com/DhavalKapil/elasticsearch-lua): Lua client for Elasticsearch **Last commit more than a year ago** + +## .NET [dotnet] + +See the [official Elasticsearch .NET client](elasticsearch-net://docs/reference/index.md). + +## Perl [perl] + +Also see the [official Elasticsearch Perl client](https://www.elastic.co/guide/en/elasticsearch/client/perl-api/current/index.html). + +* [Elastijk](https://metacpan.org/pod/Elastijk): A low-level, minimal HTTP client. **Last commit more than a year ago** + +## PHP [php] + +Also see the [official Elasticsearch PHP client](elasticsearch-php://docs/reference/index.md). + +* [Elastica](https://github.com/ruflin/Elastica): PHP client. + +* [elasticsearch](https://github.com/nervetattoo/elasticsearch): PHP client. **Last commit more than a year ago** + +* [elasticsearcher](https://github.com/madewithlove/elasticsearcher): Agnostic lightweight package on top of the Elasticsearch PHP client. Its main goal is to allow for easier structuring of queries and indices in your application. It does not want to hide or replace functionality of the Elasticsearch PHP client. **Last commit more than a year ago** + +## Python [python] + +See the [official Elasticsearch Python client](elasticsearch-py://docs/reference/index.md). + +## R [r] + +* [elastic](https://github.com/ropensci/elastic): A low-level R client for Elasticsearch. + +* [elasticdsl](https://github.com/ropensci/elasticdsl): A high-level R DSL for Elasticsearch, wrapping the elastic R client. **No longer maintained** + +* [uptasticsearch](https://github.com/uptake/uptasticsearch): An R client tailored to data science workflows. **Last commit more than a year ago** + +## Ruby [ruby] + +Also see the [official Elasticsearch Ruby client](elasticsearch-ruby://docs/reference/index.md). + +* [chewy](https://github.com/toptal/chewy): An ODM and wrapper for the official Elasticsearch client. + +* [Searchkick](https://github.com/ankane/searchkick): Intelligent search made easy. + +* [Estella](https://github.com/artsy/estella): Make your Ruby models searchable. + +* [SearchFlip](https://github.com/mrkamel/search_flip): Full-featured Elasticsearch Ruby Client with a Chainable DSL. + +* [elastics](https://github.com/printercu/elastics-rb): Tiny client with built-in zero-downtime migrations and ActiveRecord integration. **Last commit more than a year ago** + +## Rust [rust] + +Also see the [official Elasticsearch Rust client](elasticsearch-rs://docs/reference/index.md). + +* [rs-es](https://github.com/benashford/rs-es): A REST API client with a strongly-typed Query DSL. **Last commit more than a year ago** + +* [elastic](https://github.com/elastic-rs/elastic): A modular REST API client that supports freeform queries. **Last commit more than a year ago** + +## Scala [scala] + +* [elastic4s](https://github.com/sksamuel/elastic4s): Scala DSL. + +* [wabisabi](https://github.com/gphat/wabisabi): Asynchronous REST API Scala client. **No longer maintained** + +* [escalar](https://github.com/workday/escalar): Type-safe Scala wrapper for the REST API. **Last commit more than a year ago** + +* [elasticsearch-client](https://github.com/SumoLogic/elasticsearch-client): Scala DSL that uses the REST API. Akka and AWS helpers included.**No longer maintained** + +## Smalltalk [smalltalk] + +* [elasticsearch-smalltalk](https://github.com/newapplesho/elasticsearch-smalltalk): Pharo Smalltalk client for Elasticsearch. **Last commit more than a year ago** + +## Swift [swift] + +* [Elasticsearch NIO Client](https://github.com/brokenhandsio/elasticsearch-nio-client): a library for working with Elasticsearch in Swift, built on top of SwiftNIO and Swift Package Manager. + +## Vert.x [vertx] + +* [elasticsearch-client](https://github.com/reactiverse/elasticsearch-client): An Elasticsearch client for Eclipse Vert.x **Last commit more than a year ago** diff --git a/docs/reference/connector/apis/cancel-connector-sync-job-api.asciidoc b/docs/reference/connector/apis/cancel-connector-sync-job-api.asciidoc deleted file mode 100644 index d4db8ce62bc46..0000000000000 --- a/docs/reference/connector/apis/cancel-connector-sync-job-api.asciidoc +++ /dev/null @@ -1,60 +0,0 @@ -[[cancel-connector-sync-job-api]] -=== Cancel connector sync job API -++++ -Cancel connector sync job -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Cancels a connector sync job. - -To get started with Connector APIs, check out <>. - - -[[cancel-connector-sync-job-api-request]] -==== {api-request-title} -`PUT _connector/_sync_job//_cancel` - -[[cancel-connector-sync-job-api-prereqs]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_sync_job_id` parameter should reference an existing connector sync job. - -[[cancel-connector-sync-job-api-desc]] -==== {api-description-title} - -Cancels a connector sync job, which sets the `status` to `cancelling` and updates `cancellation_requested_at` to the current time. -The connector service is then responsible for setting the `status` of connector sync jobs to `cancelled`. - -[[cancel-connector-sync-job-api-path-params]] -==== {api-path-parms-title} - -`connector_sync_job_id`:: -(Required, string) - -[[cancel-connector-sync-job-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector sync job cancellation was successfully requested. - -`404`:: -No connector sync job matching `connector_sync_job_id` could be found. - -[[cancel-connector-sync-job-api-example]] -==== {api-examples-title} - -The following example cancels the connector sync job with ID `my-connector-sync-job-id`: - -[source,console] ----- -PUT _connector/_sync_job/my-connector-sync-job-id/_cancel ----- -// TEST[skip:there's no way to clean up after creating a connector sync job, as we don't know the id ahead of time. Therefore, skip this test.] diff --git a/docs/reference/connector/apis/check-in-connector-api.asciidoc b/docs/reference/connector/apis/check-in-connector-api.asciidoc deleted file mode 100644 index be7521e937316..0000000000000 --- a/docs/reference/connector/apis/check-in-connector-api.asciidoc +++ /dev/null @@ -1,84 +0,0 @@ -[[check-in-connector-api]] -=== Check in connector API -++++ -Check in a connector -++++ - -preview::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Updates the `last_seen` field of a connector with current timestamp. - -To get started with Connector APIs, check out <>. - - -[[check-in-connector-api-request]] -==== {api-request-title} - -`PUT _connector//_check_in` - -[[check-in-connector-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. - -[[check-in-connector-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - - -[[check-in-connector-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector `last_seen` field was successfully updated with a current timestamp. - -`400`:: -The `connector_id` was not provided. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[check-in-connector-api-example]] -==== {api-examples-title} - -The following example updates the `last_seen` property with current timestamp for the connector with ID `my-connector`: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "google_drive" -} --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-connector/_check_in ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- diff --git a/docs/reference/connector/apis/check-in-connector-sync-job-api.asciidoc b/docs/reference/connector/apis/check-in-connector-sync-job-api.asciidoc deleted file mode 100644 index 5f9b584621c2f..0000000000000 --- a/docs/reference/connector/apis/check-in-connector-sync-job-api.asciidoc +++ /dev/null @@ -1,59 +0,0 @@ -[[check-in-connector-sync-job-api]] -=== Check in connector sync job API -++++ -Check in connector sync job -++++ - -preview::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Checks in a connector sync job (updates `last_seen` to the current time). - -To get started with Connector APIs, check out <>. - - -[[check-in-connector-sync-job-api-request]] -==== {api-request-title} -`PUT _connector/_sync_job//_check_in/` - -[[check-in-connector-sync-job-api-prereqs]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_sync_job_id` parameter should reference an existing connector sync job. - -[[check-in-connector-sync-job-api-desc]] -==== {api-description-title} - -Checks in a connector sync job and sets `last_seen` to the time right before updating it in the internal index. - -[[check-in-connector-sync-job-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[[check-in-connector-sync-job-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector sync job was successfully checked in. - -`404`:: -No connector sync job matching `connector_sync_job_id` could be found. - -[[check-in-connector-sync-job-api-example]] -==== {api-examples-title} - -The following example checks in the connector sync job `my-connector-sync-job`: - -[source,console] ----- -PUT _connector/_sync_job/my-connector-sync-job/_check_in ----- -// TEST[skip:there's no way to clean up after creating a connector sync job, as we don't know the id ahead of time. Therefore, skip this test.] diff --git a/docs/reference/connector/apis/claim-connector-sync-job-api.asciidoc b/docs/reference/connector/apis/claim-connector-sync-job-api.asciidoc deleted file mode 100644 index 565a39c2083af..0000000000000 --- a/docs/reference/connector/apis/claim-connector-sync-job-api.asciidoc +++ /dev/null @@ -1,75 +0,0 @@ -[[claim-connector-sync-job-api]] -=== Claim connector sync job API -++++ -Claim connector sync job -++++ - -preview::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Claims a connector sync job. - -The `_claim` endpoint is not intended for direct connector management by users. It is there to support the implementation of services that utilize the https://github.com/elastic/connectors/blob/main/docs/CONNECTOR_PROTOCOL.md[Connector Protocol] to communicate with {es}. - -To get started with Connector APIs, check out <>. - - -[[claim-connector-sync-job-api-request]] -==== {api-request-title} -`PUT _connector/_sync_job//_claim` - -[[claim-connector-sync-job-api-prereqs]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_sync_job_id` parameter should reference an existing connector sync job. - -[[claim-connector-sync-job-api-desc]] -==== {api-description-title} - -Claims a connector sync job. This action updates the job's status to `in_progress` and sets the `last_seen` and `started_at` timestamps to the current time. Additionally, it can set the `sync_cursor` property for the sync job. - -[[claim-connector-sync-job-api-path-params]] -==== {api-path-parms-title} - -`connector_sync_job_id`:: -(Required, string) - -[role="child_attributes"] -[[claim-connector-sync-job-api-request-body]] -==== {api-request-body-title} - -`worker_hostname`:: -(Required, string) The host name of the current system that will execute the job. - -`sync_cursor`:: -(Optional, Object) The cursor object from the last incremental sync job. This should reference the `sync_cursor` field in the connector state for which the job is executed. - - -[[claim-connector-sync-job-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector sync job was successfully claimed. - -`404`:: -No connector sync job matching `connector_sync_job_id` could be found. - -[[claim-connector-sync-job-api-example]] -==== {api-examples-title} - -The following example claims the connector sync job with ID `my-connector-sync-job-id`: - -[source,console] ----- -PUT _connector/_sync_job/my-connector-sync-job-id/_claim -{ - "worker_hostname": "some-machine" -} ----- -// TEST[skip:there's no way to clean up after creating a connector sync job, as we don't know the id ahead of time. Therefore, skip this test.] diff --git a/docs/reference/connector/apis/connector-apis.asciidoc b/docs/reference/connector/apis/connector-apis.asciidoc deleted file mode 100644 index 719db5a315714..0000000000000 --- a/docs/reference/connector/apis/connector-apis.asciidoc +++ /dev/null @@ -1,156 +0,0 @@ -[[connector-apis]] -== Connector APIs - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -beta::[] - -The connector and sync jobs APIs provide a convenient way to create and manage Elastic <>. - and sync jobs in an internal index. To get started with Connector APIs, check out <>. - - -Connectors are {es} integrations that bring content from third-party data sources, which can be deployed on {ecloud} or hosted on your own infrastructure: - -* *Managed connectors* are a managed service on {ecloud} -* *Self-managed connectors* are self-hosted on your infrastructure - -Find a list of all supported service types in the <>. - -This API provides an alternative to relying solely on {kib} UI for connector and sync job management. The API comes with a set of -validations and assertions to ensure that the state representation in the internal index remains valid. - -[TIP] -==== -We also have a command-line interface for Elastic connectors. Learn more in the https://github.com/elastic/connectors/blob/main/docs/CLI.md[elastic/connectors] repository. -==== - -[discrete] -[[elastic-connector-apis]] -=== Connector APIs - -You can use these APIs to create, get, delete and update connectors. - -Use the following APIs to manage connectors: - -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] - - -[discrete] -[[sync-job-apis]] -=== Sync Job APIs - -You can use these APIs to create, cancel, delete and update sync jobs. - -Use the following APIs to manage sync jobs: - -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] -* <> -beta:[] - - -[discrete] -[[service-apis]] -=== Service APIs - -preview::[] - -*Connector Service APIs* are a subset of Connector API endpoints, that represent framework-level operations defined in the https://github.com/elastic/connectors/blob/main/docs/CONNECTOR_PROTOCOL.md[Connector Protocol]. These APIs are not intended for direct connector management by users but are there to support the implementation of services that utilize the Connector Protocol to communicate with {es}. - -[TIP] -==== -All Elastic connectors are built using our Python connector framework. The source code is available in the https://github.com/elastic/connectors[elastic/connectors] repository on GitHub. -==== - -[discrete] -[[connector-service-apis]] -==== Connector Service APIs - -* <> -preview:[] -* <> -preview:[] -* <> -preview:[] -* <> -preview:[] - -[discrete] -[[sync-job-service-apis]] -==== Sync Job Service APIs - -* <> -preview:[] -* <> -preview:[] -* <> -preview:[] -* <> -preview:[] - - - -include::create-connector-api.asciidoc[] -include::delete-connector-api.asciidoc[] -include::get-connector-api.asciidoc[] -include::list-connectors-api.asciidoc[] -include::update-connector-api-key-id-api.asciidoc[] -include::update-connector-configuration-api.asciidoc[] -include::update-connector-index-name-api.asciidoc[] -include::update-connector-features-api.asciidoc[] -include::update-connector-filtering-api.asciidoc[] -include::update-connector-name-description-api.asciidoc[] -include::update-connector-pipeline-api.asciidoc[] -include::update-connector-scheduling-api.asciidoc[] -include::update-connector-service-type-api.asciidoc[] - -include::create-connector-sync-job-api.asciidoc[] -include::cancel-connector-sync-job-api.asciidoc[] -include::delete-connector-sync-job-api.asciidoc[] -include::get-connector-sync-job-api.asciidoc[] -include::list-connector-sync-jobs-api.asciidoc[] - -include::check-in-connector-api.asciidoc[] -include::update-connector-error-api.asciidoc[] -include::update-connector-last-sync-api.asciidoc[] -include::update-connector-status-api.asciidoc[] - -include::check-in-connector-sync-job-api.asciidoc[] -include::claim-connector-sync-job-api.asciidoc[] -include::set-connector-sync-job-error-api.asciidoc[] -include::set-connector-sync-job-stats-api.asciidoc[] diff --git a/docs/reference/connector/apis/create-connector-api.asciidoc b/docs/reference/connector/apis/create-connector-api.asciidoc deleted file mode 100644 index 3300ce270c345..0000000000000 --- a/docs/reference/connector/apis/create-connector-api.asciidoc +++ /dev/null @@ -1,145 +0,0 @@ -[[create-connector-api]] -=== Create connector API -++++ -Create connector -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Creates an Elastic connector. -Connectors are {es} integrations that bring content from third-party data sources, which can be deployed on {ecloud} or hosted on your own infrastructure: - -* *Managed connectors* are a managed service on {ecloud} -* *Self-managed connectors* are self-hosted on your infrastructure - -Find a list of all supported service types in the <>. - -To get started with Connector APIs, check out <>. - - -[source,console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "google_drive" -} --------------------------------------------------- -//// -[source,console] ----- -DELETE _connector/my-connector ----- -// TEST[continued] -//// - -[[create-connector-api-request]] -==== {api-request-title} -* `POST _connector` - -* `PUT _connector/` - - -[[create-connector-api-prereqs]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `service_type` parameter should reference a supported third-party service. See the available service types for <> and <> connectors. This can also reference the service type of your custom connector. - - -[[create-connector-api-desc]] -==== {api-description-title} - -Creates a connector document in the internal index and initializes its configuration, filtering, and scheduling with default values. These values can be updated later as needed. - -[[create-connector-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Unique identifier of a connector. - - -[role="child_attributes"] -[[create-connector-api-request-body]] -==== {api-request-body-title} - -`description`:: -(Optional, string) The description of the connector. - -`index_name`:: -(Optional, string) The target index to sync data. If the index doesn't exist, it will be created upon the first sync. - -`name`:: -(Optional, string) The name of the connector. Setting the connector name is recommended when managing connectors in {kib}. - -`is_native`:: -(Optional, boolean) Indicates if it's a managed connector. Defaults to `false`. - -`language`:: -(Optional, string) Language analyzer for the data. Limited to supported languages. - -`service_type`:: -(Optional, string) Connector service type. Can reference Elastic-supported third-party services or a custom connector type. See the available service types for <> and <> connectors. - - -[role="child_attributes"] -[[create-connector-api-response-body]] -==== {api-response-body-title} - -`id`:: - (string) The ID associated with the connector document. Returned when using a POST request. - -`result`:: - (string) The result of the indexing operation, `created` or `updated`. Returned when using a PUT request. - -[[create-connector-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Indicates that an existing connector was updated successfully. - -`201`:: -Indicates that the connector was created successfully. - -`400`:: -Indicates that the request was malformed. - -[[create-connector-api-example]] -==== {api-examples-title} - -[source,console] ----- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "description": "My Connector to sync data to Elastic index from Google Drive", - "service_type": "google_drive", - "language": "en" -} ----- - - -The API returns the following result: - -[source,console-result] ----- -{ - "result": "created", - "id": "my-connector" -} ----- -//// -[source,console] ----- -DELETE _connector/my-connector ----- -// TEST[continued] -//// diff --git a/docs/reference/connector/apis/create-connector-sync-job-api.asciidoc b/docs/reference/connector/apis/create-connector-sync-job-api.asciidoc deleted file mode 100644 index 1a66d47578a8b..0000000000000 --- a/docs/reference/connector/apis/create-connector-sync-job-api.asciidoc +++ /dev/null @@ -1,80 +0,0 @@ -[[create-connector-sync-job-api]] -=== Create connector sync job API -++++ -Create connector sync job -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Creates a connector sync job. - -To get started with Connector APIs, check out <>. - - -[source, console] --------------------------------------------------- -POST _connector/_sync_job -{ - "id": "connector-id", - "job_type": "full", - "trigger_method": "on_demand" -} --------------------------------------------------- -// TEST[skip:there's no way to clean up after this code snippet, as we don't know the id ahead of time] - - -[[create-connector-sync-job-api-request]] -==== {api-request-title} -`POST _connector/_sync_job` - - -[[create-connector-sync-job-api-prereqs]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `id` parameter should reference an existing connector. - -[[create-connector-sync-job-api-desc]] -==== {api-description-title} - -Creates a connector sync job document in the internal index and initializes its counters and timestamps with default values. -Certain values can be updated via the API. - -[role="child_attributes"] -[[create-connector-sync-job-api-request-body]] -==== {api-request-body-title} - -`id`:: -(Required, string) The id of the connector to create the sync job for. - -`job_type`:: -(Optional, string) The job type of the created sync job. Defaults to `full`. - -`trigger_method`:: -(Optional, string) The trigger method of the created sync job. Defaults to `on_demand`. - - -[role="child_attributes"] -[[create-connector-sync-job-api-response-body]] -==== {api-response-body-title} - -`id`:: -(string) The ID associated with the connector sync job document. - -[[create-connector-sync-job-api-response-codes]] -==== {api-response-codes-title} - -`201`:: -Indicates that the connector sync job was created successfully. - -`400`:: -Indicates that the request was malformed. - -`404`:: -Indicates that either the index or the referenced connector is missing. diff --git a/docs/reference/connector/apis/delete-connector-api.asciidoc b/docs/reference/connector/apis/delete-connector-api.asciidoc deleted file mode 100644 index a324630cc8a52..0000000000000 --- a/docs/reference/connector/apis/delete-connector-api.asciidoc +++ /dev/null @@ -1,90 +0,0 @@ -[[delete-connector-api]] -=== Delete connector API -++++ -Delete connector -++++ - -beta::[] - - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Deletes a connector and optionally removes associated sync jobs. - -Note: this action doesn't delete any API key, ingest pipeline or data index associated with the connector. These need to be removed manually. - -To get started with Connector APIs, check out <>. - - -[[delete-connector-api-request]] -==== {api-request-title} - -`DELETE _connector/` - -[[delete-connector-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. - -[[delete-connector-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -``:: -(Optional, boolean) If `true`, the connector doc is deleted. If `false`, connector doc is marked as deleted (soft deletion). Defaults to `false`. - -`delete_sync_jobs`:: -(Optional, boolean) A flag indicating if associated sync jobs should be also removed. Defaults to `false`. - -[[delete-connector-api-response-codes]] -==== {api-response-codes-title} - -`400`:: -The `connector_id` was not provided. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[delete-connector-api-example]] -==== {api-examples-title} - -The following example deletes the connector with ID `my-connector`: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-connector -{ - "name": "My Connector", - "service_type": "google_drive" -} - -PUT _connector/another-connector -{ - "name": "My Connector", - "service_type": "google_drive" -} --------------------------------------------------- -// TESTSETUP -//// - -[source,console] ----- -DELETE _connector/another-connector?delete_sync_jobs=true ----- - -[source,console-result] ----- -{ - "acknowledged": true -} ----- - -The following example deletes the connector with ID `another-connector` and its associated sync jobs. diff --git a/docs/reference/connector/apis/delete-connector-sync-job-api.asciidoc b/docs/reference/connector/apis/delete-connector-sync-job-api.asciidoc deleted file mode 100644 index bc906d12cae40..0000000000000 --- a/docs/reference/connector/apis/delete-connector-sync-job-api.asciidoc +++ /dev/null @@ -1,62 +0,0 @@ -[[delete-connector-sync-job-api]] -=== Delete connector sync job API -++++ -Delete connector sync job -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Removes a connector sync job and its associated data. -This is a destructive action that is not recoverable. - -To get started with Connector APIs, check out <>. - - -[[delete-connector-sync-job-api-request]] -==== {api-request-title} - -`DELETE _connector/_sync_job/` - -[[delete-connector-sync-job-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. - -[[delete-connector-sync-job-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[[delete-connector-sync-job-api-response-codes]] -==== {api-response-codes-title} - -`400`:: -The `connector_sync_job_id` was not provided. - -`404`:: -No connector sync job matching `connector_sync_job_id` could be found. - -[[delete-connector-sync-job-api-example]] -==== {api-examples-title} - -The following example deletes the connector sync job with ID `my-connector-sync-job-id`: - -[source,console] ----- -DELETE _connector/_sync_job/my-connector-sync-job-id ----- -// TEST[skip:there's no way to clean up after this code snippet, as we don't know the ids of sync jobs ahead of time] - -[source,console-result] ----- -{ - "acknowledged": true -} ----- diff --git a/docs/reference/connector/apis/get-connector-api.asciidoc b/docs/reference/connector/apis/get-connector-api.asciidoc deleted file mode 100644 index c8cbae668c261..0000000000000 --- a/docs/reference/connector/apis/get-connector-api.asciidoc +++ /dev/null @@ -1,76 +0,0 @@ -[[get-connector-api]] -=== Get connector API -++++ -Get connector -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Retrieves the details about a connector. - -To get started with Connector APIs, check out <>. - - -[[get-connector-api-request]] -==== {api-request-title} - -`GET _connector/` - -[[get-connector-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. - -[[get-connector-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -`include_deleted`:: -(Optional, boolean) A flag indicating whether to also return connectors that have been soft-deleted. Defaults to `false`. - -[[get-connector-api-response-codes]] -==== {api-response-codes-title} - -`400`:: -The `connector_id` was not provided. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[get-connector-api-example]] -==== {api-examples-title} - -The following example gets the connector `my-connector`: - -//// -[source,console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "Google Drive Connector", - "service_type": "google_drive" -} - --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -GET _connector/my-connector ----- diff --git a/docs/reference/connector/apis/get-connector-sync-job-api.asciidoc b/docs/reference/connector/apis/get-connector-sync-job-api.asciidoc deleted file mode 100644 index f4ccc59e0315e..0000000000000 --- a/docs/reference/connector/apis/get-connector-sync-job-api.asciidoc +++ /dev/null @@ -1,54 +0,0 @@ -[[get-connector-sync-job-api]] -=== Get connector sync job API -++++ -Get connector sync job -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Retrieves the details about a connector sync job. - -To get started with Connector APIs, check out <>. - - -[[get-connector-sync-job-api-request]] -==== {api-request-title} - -`GET _connector/_sync_job/` - -[[get-connector-sync-job-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. - -[[get-connector-sync-job-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[[get-connector-sync-job-api-response-codes]] -==== {api-response-codes-title} - -`400`:: -The `connector_sync_job_id` was not provided. - -`404` (Missing resources):: -No connector sync job matching `connector_sync_job_id` could be found. - -[[get-connector-sync-job-api-example]] -==== {api-examples-title} - -The following example gets the connector sync job `my-connector-sync-job`: - -[source,console] ----- -GET _connector/_sync_job/my-connector-sync-job ----- -// TEST[skip:there's no way to clean up after creating a connector sync job, as we don't know the id ahead of time. Therefore, skip this test.] diff --git a/docs/reference/connector/apis/list-connector-sync-jobs-api.asciidoc b/docs/reference/connector/apis/list-connector-sync-jobs-api.asciidoc deleted file mode 100644 index b5f52e31ac296..0000000000000 --- a/docs/reference/connector/apis/list-connector-sync-jobs-api.asciidoc +++ /dev/null @@ -1,97 +0,0 @@ -[role="xpack"] -[[list-connector-sync-jobs-api]] -=== List connector sync jobs API -++++ -List connector sync jobs -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Returns information about all stored connector sync jobs ordered by their creation date in ascending order. - -To get started with Connector APIs, check out <>. - - -[[list-connector-sync-jobs-api-request]] -==== {api-request-title} - -`GET _connector/_sync_job` - -[[list-connector-sync-jobs-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. - -[[list-connector-sync-jobs-api-path-params]] -==== {api-path-parms-title} - -`size`:: -(Optional, integer) Maximum number of results to retrieve. Defaults to `100`. - -`from`:: -(Optional, integer) The offset from the first result to fetch. Defaults to `0`. - -`status`:: -(Optional, job status) A job status to filter the results for. Available statuses include: `canceling`, `canceled`, `completed`, `error`, `in_progress`, `pending`, `suspended`. - -`connector_id`:: -(Optional, string) The connector id the fetched sync jobs need to have. - -`job_type`:: -(Optional, job type) A comma-separated list of job types. Available job types are: `full`, `incremental` and `access_control`. - -[[list-connector-sync-jobs-api-example]] -==== {api-examples-title} - -The following example lists all connector sync jobs: - - -[source,console] ----- -GET _connector/_sync_job ----- -// TEST[skip:there's no way to clean up after this code snippet, as we don't know the ids of sync jobs ahead of time] - -The following example lists the first two connector sync jobs: - -[source,console] ----- -GET _connector/_sync_job?from=0&size=2 ----- -// TEST[skip:there's no way to clean up after this code snippet, as we don't know the ids of sync jobs ahead of time] - -The following example lists pending connector sync jobs (the first 100 per default): -[source,console] ----- -GET _connector/_sync_job?status=pending ----- -// TEST[skip:there's no way to clean up after this code snippet, as we don't know the ids of sync jobs ahead of time] - -The following example lists connector sync jobs (the first 100 per default) for the connector with id `connector-1`: -[source,console] ----- -GET _connector/_sync_job?connector_id=connector-1 ----- -// TEST[skip:there's no way to clean up after this code snippet, as we don't know the ids of sync jobs ahead of time] - -The following example lists connector sync jobs (the first 100 per default) for the connector with job type `full` or `incremental`: -[source,console] ----- -GET _connector/_sync_job?job_type=full,incremental ----- -// TEST[skip:there's no way to clean up after this code snippet, as we don't know the ids of sync jobs ahead of time] - -[[list-connector-sync-jobs-api-response-codes]] -==== {api-response-codes-title} - -`200`: -Indicates that results were successfully returned (results can also be empty). - -`400`: -Indicates that the request was malformed. diff --git a/docs/reference/connector/apis/list-connectors-api.asciidoc b/docs/reference/connector/apis/list-connectors-api.asciidoc deleted file mode 100644 index d334e5d92c232..0000000000000 --- a/docs/reference/connector/apis/list-connectors-api.asciidoc +++ /dev/null @@ -1,119 +0,0 @@ -[role="xpack"] -[[list-connector-api]] -=== List connectors API -++++ -List connectors -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Returns information about all created connectors. - -To get started with Connector APIs, check out <>. - - - -[[list-connector-api-request]] -==== {api-request-title} - -`GET _connector` - -[[list-connector-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. - -[[list-connector-api-path-params]] -==== {api-path-parms-title} - -`size`:: -(Optional, integer) Maximum number of results to retrieve. Defaults to `100`. - -`from`:: -(Optional, integer) The offset from the first result to fetch. Defaults to `0`. - -`index_name`:: -(Optional, string) A comma-separated list of index names associated with connectors, used to filter search results. - -`connector_name`:: -(Optional, string) A comma-separated list of connector names, used to filter search results. - -`service_type`:: -(Optional, string) A comma-separated list of connector service types, used to filter search results. - -`include_deleted`:: -(Optional, boolean) A flag indicating whether to also return connectors that have been soft-deleted. Defaults to `false`. - -[[list-connector-api-example]] -==== {api-examples-title} - -The following example lists all connectors: - -//// -[source,console] --------------------------------------------------- -PUT _connector/connector-1 -{ - "index_name": "search-google-drive", - "name": "Google Drive Connector", - "service_type": "google_drive" -} - -PUT _connector/connector-2 -{ - "index_name": "search-sharepoint-online", - "name": "Sharepoint Online Connector", - "service_type": "sharepoint_online" -} - --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/connector-1 - -DELETE _connector/connector-2 --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -GET _connector ----- - -The following example lists the first two connectors: - -[source,console] ----- -GET _connector?from=0&size=2 ----- - -An example to list a connector associated with the `search-google-drive` Elasticsearch index: - -[source,console] ----- -GET _connector?index_name=search-google-drive ----- - - -An example to list all connectors with `sharepoint_online` service type: - -[source,console] ----- -GET _connector?service_type=sharepoint_online ----- - -An example to list all connectors with `sharepoint_online` or `google_drive` service type: - -[source,console] ----- -GET _connector?service_type=sharepoint_online,google_drive ----- diff --git a/docs/reference/connector/apis/set-connector-sync-job-error-api.asciidoc b/docs/reference/connector/apis/set-connector-sync-job-error-api.asciidoc deleted file mode 100644 index b5a0cd667f3ee..0000000000000 --- a/docs/reference/connector/apis/set-connector-sync-job-error-api.asciidoc +++ /dev/null @@ -1,69 +0,0 @@ -[[set-connector-sync-job-error-api]] -=== Set connector sync job error API -++++ -Set connector sync job error -++++ - -preview::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Sets a connector sync job error. - -To get started with Connector APIs, check out <>. - - -[[set-connector-sync-job-error-api-request]] -==== {api-request-title} -`PUT _connector/_sync_job//_error` - -[[set-connector-sync-job-error-api-prereqs]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_sync_job_id` parameter should reference an existing connector sync job. - -[[set-connector-sync-job-error-api-desc]] -==== {api-description-title} - -Sets the `error` field for the specified connector sync job and sets its `status` to `error`. - -[[set-connector-sync-job-error-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[set-connector-sync-job-error-api-request-body]] -==== {api-request-body-title} - -`error`:: -(Required, string) The error to set the connector sync job `error` field to. - -[[set-connector-sync-job-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Indicates that the connector sync job error was set successfully. - -`404`:: -No connector sync job matching `connector_sync_job_id` could be found. - -[[set-connector-sync-job-error-api-example]] -==== {api-examples-title} - -The following example sets the error `some-error` in the connector sync job `my-connector-sync-job`: - -[source,console] ----- -PUT _connector/_sync_job/my-connector-sync-job/_error -{ - "error": "some-error" -} ----- -// TEST[skip:there's no way to clean up after creating a connector sync job, as we don't know the id ahead of time. Therefore, skip this test.] diff --git a/docs/reference/connector/apis/set-connector-sync-job-stats-api.asciidoc b/docs/reference/connector/apis/set-connector-sync-job-stats-api.asciidoc deleted file mode 100644 index 5691280a30dd7..0000000000000 --- a/docs/reference/connector/apis/set-connector-sync-job-stats-api.asciidoc +++ /dev/null @@ -1,91 +0,0 @@ -[[set-connector-sync-job-stats-api]] -=== Set connector sync job stats API -++++ -Set connector sync job stats -++++ - -preview::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Sets connector sync job stats. - -To get started with Connector APIs, check out <>. - - -[[set-connector-sync-job-stats-api-request]] -==== {api-request-title} -`PUT _connector/_sync_job//_stats` - -[[set-connector-sync-job-stats-api-prereqs]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_sync_job_id` parameter should reference an existing connector sync job. - -[[set-connector-sync-job-stats-api-desc]] -==== {api-description-title} - -Sets the stats for a connector sync job. -Stats include: `deleted_document_count`, `indexed_document_count`, `indexed_document_volume` and `total_document_count`. -`last_seen` can also be updated using this API. -This API is mainly used by the connector service for updating sync job information. - -[[set-connector-sync-job-stats-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[set-connector-sync-job-stats-api-request-body]] -==== {api-request-body-title} - -`deleted_document_count`:: -(Required, int) The number of documents the sync job deleted. - -`indexed_document_count`:: -(Required, int) The number of documents the sync job indexed. - -`indexed_document_volume`:: -(Required, int) The total size of the data (in MiB) the sync job indexed. - -`total_document_count`:: -(Optional, int) The total number of documents in the target index after the sync job finished. - -`last_seen`:: -(Optional, instant) The timestamp to set the connector sync job's `last_seen` property. - -`metadata`:: -(Optional, object) The connector-specific metadata. - -[[set-connector-sync-job-stats-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Indicates that the connector sync job stats were successfully updated. - -`404`:: -No connector sync job matching `connector_sync_job_id` could be found. - -[[set-connector-sync-job-stats-api-example]] -==== {api-examples-title} - -The following example sets all mandatory and optional stats for the connector sync job `my-connector-sync-job`: - -[source,console] ----- -PUT _connector/_sync_job/my-connector-sync-job/_stats -{ - "deleted_document_count": 10, - "indexed_document_count": 20, - "indexed_document_volume": 1000, - "total_document_count": 2000, - "last_seen": "2023-01-02T10:00:00Z" -} ----- -// TEST[skip:there's no way to clean up after creating a connector sync job, as we don't know the id ahead of time. Therefore, skip this test.] diff --git a/docs/reference/connector/apis/update-connector-api-key-id-api.asciidoc b/docs/reference/connector/apis/update-connector-api-key-id-api.asciidoc deleted file mode 100644 index 8df49c43c128e..0000000000000 --- a/docs/reference/connector/apis/update-connector-api-key-id-api.asciidoc +++ /dev/null @@ -1,106 +0,0 @@ -[[update-connector-api-key-id-api]] -=== Update connector API key ID API -++++ -Update connector API key id -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Updates the `api_key_id` and/or `api_key_secret_id` field(s) of a connector, specifying: - -. The ID of the API key used for authorization -. The ID of the Connector Secret where the API key is stored - -The Connector Secret ID is only required for Elastic managed connectors. -Self-managed connectors do not use this field. -See the documentation for <> for more details. - -To get started with Connector APIs, check out <>. - - -[[update-connector-api-key-id-api-request]] -==== {api-request-title} - -`PUT _connector//_api_key_id` - -[[update-connector-api-key-id-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. -* The `api_key_id` parameter should reference an existing API key. -* The `api_key_secret_id` parameter should reference an existing Connector Secret containing an encoded API key value. - -[[update-connector-api-key-id-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[update-connector-api-key-id-api-request-body]] -==== {api-request-body-title} - -`api_key_id`:: -(Optional, string) ID of the API key that the connector will use to authorize access to required indices. Each connector can be associated with at most one API key. - -`api_key_secret_id`:: -(Optional, string) ID of the Connector Secret that contains the encoded API key. This should be the same API key as `api_key_id` references. This is only required for Elastic managed connectors. - -[[update-connector-api-key-id-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector `api_key_id` and/or `api_key_secret_id` field(s) successfully updated. - -`400`:: -The `connector_id` was not provided or the request payload was malformed. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[update-connector-api-key-id-api-example]] -==== {api-examples-title} - -The following example updates the `api_key_id` and `api_key_secret_id` field(s) for the connector with ID `my-connector`: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "google_drive" -} --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-connector/_api_key_id -{ - "api_key_id": "my-api-key-id", - "api_key_secret_id": "my-connector-secret-id" -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- diff --git a/docs/reference/connector/apis/update-connector-configuration-api.asciidoc b/docs/reference/connector/apis/update-connector-configuration-api.asciidoc deleted file mode 100644 index d02c332d7f34b..0000000000000 --- a/docs/reference/connector/apis/update-connector-configuration-api.asciidoc +++ /dev/null @@ -1,356 +0,0 @@ -[[update-connector-configuration-api]] -=== Update connector configuration API -++++ -Update connector configuration -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Updates a connector's `configuration`, allowing for config value updates within a registered configuration schema. - -To get started with Connector APIs, check out <>. - - -[[update-connector-configuration-api-request]] -==== {api-request-title} - -`PUT _connector//_configuration` - -[[update-connector-configuration-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. -* To update configuration `values`, the connector `configuration` schema must be first registered by a running instance of Elastic connector service. -* Make sure configuration fields are compatible with the configuration schema for the third-party data source. Refer to the individual <> for details. - -[[update-connector-configuration-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[update-connector-configuration-api-request-body]] -==== {api-request-body-title} - -`values`:: -(Optional, object) Configuration values for the connector, represented as a mapping of configuration fields to their respective values within a registered schema. - -`configuration`:: -(Optional, object) The configuration schema definition for the connector. The configuration field is a map where each key represents a specific configuration field name, and the value is a `ConnectorConfiguration` object. For connector management use `values` to pass config values. The `configuration` object is used by the Elastic connector service to register the connector configuration schema. - - -[[update-connector-configuration-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector configuration was successfully updated. - -`400`:: -The `connector_id` was not provided or the request payload was malformed. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[update-connector-configuration-api-example]] -==== {api-examples-title} - -The following example configures a `sharepoint_online` connector. Find the supported configuration options in the <>, or by inspecting the schema in the connector's `configuration` field using the <>. - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-spo-connector -{ - "index_name": "search-sharepoint-online", - "name": "Sharepoint Online Connector", - "service_type": "sharepoint_online" -} - -PUT _connector/my-spo-connector/_configuration -{ - "configuration": { - "tenant_id": { - "default_value": null, - "depends_on": [], - "display": "textbox", - "label": "Tenant ID", - "options": [], - "order": 1, - "required": true, - "sensitive": false, - "tooltip": "", - "type": "str", - "ui_restrictions": [], - "validations": [], - "value": "" - }, - "tenant_name": { - "default_value": null, - "depends_on": [], - "display": "textbox", - "label": "Tenant name", - "options": [], - "order": 2, - "required": true, - "sensitive": false, - "tooltip": "", - "type": "str", - "ui_restrictions": [], - "validations": [], - "value": "" - }, - "client_id": { - "default_value": null, - "depends_on": [], - "display": "textbox", - "label": "Client ID", - "options": [], - "order": 3, - "required": true, - "sensitive": false, - "tooltip": "", - "type": "str", - "ui_restrictions": [], - "validations": [], - "value": "" - }, - "secret_value": { - "default_value": null, - "depends_on": [], - "display": "textbox", - "label": "Secret value", - "options": [], - "order": 4, - "required": true, - "sensitive": true, - "tooltip": "", - "type": "str", - "ui_restrictions": [], - "validations": [], - "value": "" - }, - "site_collections": { - "default_value": null, - "depends_on": [], - "display": "textarea", - "label": "Comma-separated list of sites", - "options": [], - "order": 5, - "required": true, - "sensitive": false, - "tooltip": "A comma-separated list of sites to ingest data from. Use * to include all available sites.", - "type": "list", - "ui_restrictions": [], - "validations": [], - "value": "" - }, - "use_text_extraction_service": { - "default_value": false, - "depends_on": [], - "display": "toggle", - "label": "Use text extraction service", - "options": [], - "order": 6, - "required": true, - "sensitive": false, - "tooltip": "Requires a separate deployment of the Elastic Data Extraction Service. Also requires that pipeline settings disable text extraction.", - "type": "bool", - "ui_restrictions": [ - "advanced" - ], - "validations": [], - "value": false - }, - "use_document_level_security": { - "default_value": false, - "depends_on": [], - "display": "toggle", - "label": "Enable document level security", - "options": [], - "order": 7, - "required": true, - "sensitive": false, - "tooltip": "Document level security ensures identities and permissions set in Sharepoint Online are maintained in Elasticsearch. This metadata is added to your Elasticsearch documents, so you can control user and group read-access. Access control syncs ensure this metadata is kept up to date.", - "type": "bool", - "ui_restrictions": [], - "validations": [], - "value": false - }, - "fetch_drive_item_permissions": { - "default_value": true, - "depends_on": [ - { - "field": "use_document_level_security", - "value": true - } - ], - "display": "toggle", - "label": "Fetch drive item permissions", - "options": [], - "order": 8, - "required": true, - "sensitive": false, - "tooltip": "Enable this option to fetch drive item specific permissions. This setting can increase sync time.", - "type": "bool", - "ui_restrictions": [], - "validations": [], - "value": true - }, - "fetch_unique_page_permissions": { - "default_value": true, - "depends_on": [ - { - "field": "use_document_level_security", - "value": true - } - ], - "display": "toggle", - "label": "Fetch unique page permissions", - "options": [], - "order": 9, - "required": true, - "sensitive": false, - "tooltip": "Enable this option to fetch unique page permissions. This setting can increase sync time. If this setting is disabled a page will inherit permissions from its parent site.", - "type": "bool", - "ui_restrictions": [], - "validations": [], - "value": true - }, - "fetch_unique_list_permissions": { - "default_value": true, - "depends_on": [ - { - "field": "use_document_level_security", - "value": true - } - ], - "display": "toggle", - "label": "Fetch unique list permissions", - "options": [], - "order": 10, - "required": true, - "sensitive": false, - "tooltip": "Enable this option to fetch unique list permissions. This setting can increase sync time. If this setting is disabled a list will inherit permissions from its parent site.", - "type": "bool", - "ui_restrictions": [], - "validations": [], - "value": true - }, - "fetch_unique_list_item_permissions": { - "default_value": true, - "depends_on": [ - { - "field": "use_document_level_security", - "value": true - } - ], - "display": "toggle", - "label": "Fetch unique list item permissions", - "options": [], - "order": 11, - "required": true, - "sensitive": false, - "tooltip": "Enable this option to fetch unique list item permissions. This setting can increase sync time. If this setting is disabled a list item will inherit permissions from its parent site.", - "type": "bool", - "ui_restrictions": [], - "validations": [], - "value": true - }, - "enumerate_all_sites": { - "default_value": true, - "depends_on": [], - "display": "toggle", - "label": "Enumerate all sites?", - "options": [], - "order": 6, - "required": false, - "sensitive": false, - "tooltip": "If enabled, sites will be fetched in bulk, then filtered down to the configured list of sites. This is efficient when syncing many sites. If disabled, each configured site will be fetched with an individual request. This is efficient when syncing fewer sites.", - "type": "bool", - "ui_restrictions": [], - "validations": [], - "value": true - }, - "fetch_subsites": { - "default_value": false, - "depends_on": [ - { - "field": "enumerate_all_sites", - "value": false - } - ], - "display": "toggle", - "label": "Fetch sub-sites of configured sites?", - "options": [], - "order": 7, - "required": false, - "sensitive": false, - "tooltip": "Whether subsites of the configured site(s) should be automatically fetched.", - "type": "bool", - "ui_restrictions": [], - "validations": [], - "value": true - } - } -} --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-spo-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-spo-connector/_configuration -{ - "values": { - "tenant_id": "my-tenant-id", - "tenant_name": "my-sharepoint-site", - "client_id": "foo", - "secret_value": "bar", - "site_collections": "*" - } -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- - - -When you're first setting up your connector you'll need to provide all required configuration details to start running syncs. -But you can also use this API to only update a subset of fields. -Here's an example that only updates the `secret_value` field for a `sharepoint_online` connector. -The other configuration values won't change. - -[source,console] ----- -PUT _connector/my-spo-connector/_configuration -{ - "values": { - "secret_value": "foo-bar" - } -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- diff --git a/docs/reference/connector/apis/update-connector-error-api.asciidoc b/docs/reference/connector/apis/update-connector-error-api.asciidoc deleted file mode 100644 index 859a48c31c0ca..0000000000000 --- a/docs/reference/connector/apis/update-connector-error-api.asciidoc +++ /dev/null @@ -1,99 +0,0 @@ -[[update-connector-error-api]] -=== Update connector error API -++++ -Update connector error -++++ - -preview::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Updates the `error` field of a connector. - -To get started with Connector APIs, check out <>. - - -[[update-connector-error-api-request]] -==== {api-request-title} - -`PUT _connector//_error` - -[[update-connector-error-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. - -[[update-connector-error-api-desc]] -==== {api-description-title} - -Sets the `error` field for the specified connector. If the `error` provided in the request body is non-null, the connector's status is updated to `error`. Otherwise, if the `error` is reset to null, the connector status is updated to `connected`. - -[[update-connector-error-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[update-connector-error-api-request-body]] -==== {api-request-body-title} - -`error`:: -(Required, string) A messaged related to the last error encountered by the connector. - - -[[update-connector-error-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector `error` field was successfully updated. - -`400`:: -The `connector_id` was not provided or the request payload was malformed. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[update-connector-error-api-example]] -==== {api-examples-title} - -The following example updates the `error` field for the connector with ID `my-connector`: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "google_drive" -} --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-connector/_error -{ - "error": "Houston, we have a problem!" -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- diff --git a/docs/reference/connector/apis/update-connector-features-api.asciidoc b/docs/reference/connector/apis/update-connector-features-api.asciidoc deleted file mode 100644 index 74c512e42cd1b..0000000000000 --- a/docs/reference/connector/apis/update-connector-features-api.asciidoc +++ /dev/null @@ -1,145 +0,0 @@ -[[update-connector-features-api]] -=== Update connector features API -++++ -Update connector features -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Manages the `features` of a connector. This endpoint can be used to control the following aspects of a connector: - -* document-level security -* incremental syncs -* advanced sync rules -* basic sync rules - -Normally, the running connector service automatically manages these features. However, you can use this API to override the default behavior. - -To get started with Connector APIs, check out <>. - - -[[update-connector-features-api-request]] -==== {api-request-title} - -`PUT _connector//_features` - -[[update-connector-features-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. - -[[update-connector-features-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[update-connector-features-api-request-body]] -==== {api-request-body-title} - -`features`:: -(Required, object) An object containing connector features. - -* `document_level_security` (Optional, object) Controls whether document-level security is enabled with the `enabled` flag. -* `incremental_sync` (Optional, object) Controls whether incremental syncs are enabled with the `enabled` flag. -* `native_connector_api_keys`(Optional, object) Controls whether managed connector API keys are enabled with the `enabled` flag. -* `sync_rules` (Optional, object) Controls sync rules. -** `advanced` (Optional, object) Controls whether advanced sync rules are enabled with the `enabled` flag. -** `basic`(Optional, object) Controls whether basic sync rules are enabled with the `enabled` flag. - - - -[[update-connector-features-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector `features` was successfully updated. - -`400`:: -The `connector_id` was not provided or the request payload was malformed. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[update-connector-features-api-example]] -==== {api-examples-title} - -The following example updates the `features` field for the connector with ID `my-connector`: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "google_drive" -} --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-connector/_features -{ - "features": { - "document_level_security": { - "enabled": true - }, - "incremental_sync": { - "enabled": true - }, - "sync_rules": { - "advanced": { - "enabled": false - }, - "basic": { - "enabled": true - } - } - } -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- - -The endpoint supports partial updates of the `features` field. For example, to update only the `document_level_security` feature, you can send the following request: - -[source,console] ----- -PUT _connector/my-connector/_features -{ - "features": { - "document_level_security": { - "enabled": true - } - } -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- diff --git a/docs/reference/connector/apis/update-connector-filtering-api.asciidoc b/docs/reference/connector/apis/update-connector-filtering-api.asciidoc deleted file mode 100644 index 7ba0080cde28f..0000000000000 --- a/docs/reference/connector/apis/update-connector-filtering-api.asciidoc +++ /dev/null @@ -1,196 +0,0 @@ -[[update-connector-filtering-api]] -=== Update connector filtering API -++++ -Update connector filtering -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Updates the draft `filtering` configuration of a connector and marks the draft validation state as `edited`. The filtering draft is activated once validated by the running Elastic connector service. - -The filtering property is used to configure sync rules (both basic and advanced) for a connector. Learn more in the <>. - -To get started with Connector APIs, check out <>. - - -[[update-connector-filtering-api-request]] -==== {api-request-title} - -`PUT _connector//_filtering` - -[[update-connector-filtering-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. -* Filtering draft is activated once validated by the running Elastic connector service, the `draft.validation.state` must be `valid`. -* If, after a validation attempt, the `draft.validation.state` equals to `invalid`, inspect `draft.validation.errors` and fix any issues. - -[[update-connector-filtering-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[update-connector-filtering-api-request-body]] -==== {api-request-body-title} - -`rules`:: -(Optional, array of objects) -An array of <>, each with the following sub-attributes: -* `id` (Required, string) + -A unique identifier for the rule. -* `policy` (Required, string) + -Specifies the policy, such as `include` or `exclude`. -* `field` (Required, string) + -The field in the document to which this rule applies. -* `rule` (Required, string) + -The type of rule, such as `regex`, `starts_with`, `ends_with`, `contains`, `equals`, `<`, `>`, etc. -* `value` (Required, string) + -The value to be used in conjunction with the rule for matching the contents of the document's field. -* `order` (Required, number) + -The order in which the rules are applied. The first rule to match has its policy applied. -* `created_at` (Optional, datetime) + -The timestamp when the rule was added. Defaults to `now` UTC timestamp. -* `updated_at` (Optional, datetime) + -The timestamp when the rule was last edited. Defaults to `now` UTC timestamp. - -`advanced_snippet`:: -(Optional, object) -Used for <> at query time, with the following sub-attributes: -* `value` (Required, object or array) + -A JSON object/array passed directly to the connector for advanced filtering. -* `created_at` (Optional, datetime) + -The timestamp when this JSON object was created. Defaults to `now` UTC timestamp. -* `updated_at` (Optional, datetime) + -The timestamp when this JSON object was last edited. Defaults to `now` UTC timestamp. - - -[[update-connector-filtering-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector draft filtering was successfully updated. - -`400`:: -The `connector_id` was not provided or the request payload was malformed. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[update-connector-filtering-api-example]] -==== {api-examples-title} - -The following example updates the draft <> for a Google Drive connector with ID `my-g-drive-connector`. All Google Drive files with `.txt` extension will be skipped: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-g-drive-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "google_drive" -} - -PUT _connector/my-sql-connector -{ - "index_name": "search-sql", - "name": "My SQL Connector", - "service_type": "google_drive" -} - --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-g-drive-connector -DELETE _connector/my-sql-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-g-drive-connector/_filtering -{ - "rules": [ - { - "field": "file_extension", - "id": "exclude-txt-files", - "order": 0, - "policy": "exclude", - "rule": "equals", - "value": "txt" - }, - { - "field": "_", - "id": "DEFAULT", - "order": 1, - "policy": "include", - "rule": "regex", - "value": ".*" - } - ] -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- - -The following example updates the draft advanced sync rules for a MySQL connector with id `my-sql-connector`. Advanced sync rules are specific to each connector type. Refer to the references for connectors that support <> for syntax and examples. - -[source,console] ----- -PUT _connector/my-sql-connector/_filtering -{ - "advanced_snippet": { - "value": [{ - "tables": [ - "users", - "orders" - ], - "query": "SELECT users.id AS id, orders.order_id AS order_id FROM users JOIN orders ON users.id = orders.user_id" - }] - } -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- - - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-sql-connector/_filtering/_validation -{ - "validation": { - "state": "valid", - "errors": [] - } -} --------------------------------------------------- -// TEST[continued] -//// - - -Note, you can also update draft `rules` and `advanced_snippet` in a single request. - -Once the draft is updated, its validation state is set to `edited`. The connector service will then validate the rules and report the validation state as either `invalid` or `valid`. If the state is `valid`, the draft filtering will be activated by the running Elastic connector service. diff --git a/docs/reference/connector/apis/update-connector-index-name-api.asciidoc b/docs/reference/connector/apis/update-connector-index-name-api.asciidoc deleted file mode 100644 index fbbef6e66ca40..0000000000000 --- a/docs/reference/connector/apis/update-connector-index-name-api.asciidoc +++ /dev/null @@ -1,94 +0,0 @@ -[[update-connector-index-name-api]] -=== Update connector index name API -++++ -Update connector index name -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Updates the `index_name` field of a connector, specifying the index where the data ingested by the connector is stored. - -To get started with Connector APIs, check out <>. - - -[[update-connector-index-name-api-request]] -==== {api-request-title} - -`PUT _connector//_index_name` - -[[update-connector-index-name-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. - -[[update-connector-index-name-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[update-connector-index-name-api-request-body]] -==== {api-request-body-title} - -`index_name`:: -(Required, string) Index name where the connector ingests data. Each index name can be associated with at most one connector. - - -[[update-connector-index-name-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector `index_name` field was successfully updated. - -`400`:: -The `connector_id` was not provided or the request payload was malformed. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[update-connector-index-name-api-example]] -==== {api-examples-title} - -The following example updates the `index_name` field for the connector with ID `my-connector`: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "google_drive" -} --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-connector/_index_name -{ - "index_name": "data-from-my-google-drive" -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- diff --git a/docs/reference/connector/apis/update-connector-last-sync-api.asciidoc b/docs/reference/connector/apis/update-connector-last-sync-api.asciidoc deleted file mode 100644 index d827d25c12b4e..0000000000000 --- a/docs/reference/connector/apis/update-connector-last-sync-api.asciidoc +++ /dev/null @@ -1,143 +0,0 @@ -[[update-connector-last-sync-api]] -=== Update connector last sync stats API -++++ -Update connector last sync stats -++++ - -preview::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Updates the fields related to the last sync of a connector. - -This action is used for analytics and monitoring. - -To get started with Connector APIs, check out <>. - - -[[update-connector-last-sync-api-request]] -==== {api-request-title} - -`PUT _connector//_last_sync` - -[[update-connector-last-sync-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. - -[[update-connector-last-sync-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[update-connector-last-sync-api-request-body]] -==== {api-request-body-title} - -`last_access_control_sync_error`:: -(Optional, string) The last error message related to access control sync, if any. - -`last_access_control_sync_scheduled_at`:: -(Optional, datetime) The datetime indicating when the last access control sync was scheduled. - -`last_access_control_sync_status`:: -(Optional, ConnectorSyncStatus) The status of the last access control sync. - -`last_deleted_document_count`:: -(Optional, long) The number of documents deleted in the last sync process. - -`last_incremental_sync_scheduled_at`:: -(Optional, datetime) The datetime when the last incremental sync was scheduled. - -`last_indexed_document_count`:: -(Optional, long) The number of documents indexed in the last sync. - -`last_sync_error`:: -(Optional, string) The last error message encountered during a sync process, if any. - -`last_sync_scheduled_at`:: -(Optional, datetime) The datetime when the last sync was scheduled. - -`last_sync_status`:: -(Optional, ConnectorSyncStatus) The status of the last sync. - -`last_synced`:: -(Optional, datetime) The datetime of the last successful synchronization. - - -The value of `ConnectorSyncStatus` is one of the following lowercase strings representing different sync states: - -* `canceling`: The sync process is in the process of being canceled. -* `canceled`: The sync process has been canceled. -* `completed`: The sync process completed successfully. -* `error`: An error occurred during the sync process. -* `in_progress`: The sync process is currently underway. -* `pending`: The sync is pending and has not yet started. -* `suspended`: The sync process has been temporarily suspended. - - -[[update-connector-last-sync-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector last sync stats were successfully updated. - -`400`:: -The `connector_id` was not provided or the request payload was malformed. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[update-connector-last-sync-api-example]] -==== {api-examples-title} - -The following example updates the last sync stats for the connector with ID `my-connector`: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "google_drive" -} --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-connector/_last_sync -{ - "last_access_control_sync_error": "Houston, we have a problem!", - "last_access_control_sync_scheduled_at": "2023-11-09T15:13:08.231Z", - "last_access_control_sync_status": "pending", - "last_deleted_document_count": 42, - "last_incremental_sync_scheduled_at": "2023-11-09T15:13:08.231Z", - "last_indexed_document_count": 42, - "last_sync_error": "Houston, we have a problem!", - "last_sync_scheduled_at": "2024-11-09T15:13:08.231Z", - "last_sync_status": "completed", - "last_synced": "2024-11-09T15:13:08.231Z" -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- diff --git a/docs/reference/connector/apis/update-connector-name-description-api.asciidoc b/docs/reference/connector/apis/update-connector-name-description-api.asciidoc deleted file mode 100644 index c0d0568baef35..0000000000000 --- a/docs/reference/connector/apis/update-connector-name-description-api.asciidoc +++ /dev/null @@ -1,98 +0,0 @@ -[[update-connector-name-description-api]] -=== Update connector name and description API -++++ -Update connector name and description -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Updates the `name` and `description` fields of a connector. - -To get started with Connector APIs, check out <>. - - -[[update-connector-name-description-api-request]] -==== {api-request-title} - -`PUT _connector//_name` - -[[update-connector-name-description-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. - -[[update-connector-name-description-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[update-connector-name-description-api-request-body]] -==== {api-request-body-title} - -`name`:: -(Required, string) Name of the connector. - -`description`:: -(Optional, string) Description of the connector. - - -[[update-connector-name-description-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector `name` and `description` fields were successfully updated. - -`400`:: -The `connector_id` was not provided or the request payload was malformed. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[update-connector-name-description-api-example]] -==== {api-examples-title} - -The following example updates the `name` and `description` fields for the connector with ID `my-connector`: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "google_drive" -} --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-connector/_name -{ - "name": "Custom connector", - "description": "This is my customized connector" -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- diff --git a/docs/reference/connector/apis/update-connector-pipeline-api.asciidoc b/docs/reference/connector/apis/update-connector-pipeline-api.asciidoc deleted file mode 100644 index a886fe6f20da7..0000000000000 --- a/docs/reference/connector/apis/update-connector-pipeline-api.asciidoc +++ /dev/null @@ -1,111 +0,0 @@ -[[update-connector-pipeline-api]] -=== Update connector pipeline API -++++ -Update connector pipeline -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Updates the `pipeline` configuration of a connector. - -When you create a new connector, the configuration of an <> is populated with default settings. - -To get started with Connector APIs, check out <>. - - -[[update-connector-pipeline-api-request]] -==== {api-request-title} - -`PUT _connector//_pipeline` - -[[update-connector-pipeline-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. - -[[update-connector-pipeline-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[update-connector-pipeline-api-request-body]] -==== {api-request-body-title} - -`pipeline`:: -(Required, object) The pipeline configuration of the connector. The pipeline determines how data is processed during ingestion into Elasticsearch. - -Pipeline configuration must include the following attributes: - -- `extract_binary_content` (Required, boolean) A flag indicating whether to extract binary content during ingestion. - -- `name` (Required, string) The name of the ingest pipeline. - -- `reduce_whitespace` (Required, boolean) A flag indicating whether to reduce extra whitespace in the ingested content. - -- `run_ml_inference` (Required, boolean) A flag indicating whether to run machine learning inference on the ingested content. - - -[[update-connector-pipeline-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector `pipeline` field was successfully updated. - -`400`:: -The `connector_id` was not provided or the request payload was malformed. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[update-connector-pipeline-api-example]] -==== {api-examples-title} - -The following example updates the `pipeline` property for the connector with ID `my-connector`: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "google_drive" -} --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-connector/_pipeline -{ - "pipeline": { - "extract_binary_content": true, - "name": "my-connector-pipeline", - "reduce_whitespace": true, - "run_ml_inference": true - } -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- diff --git a/docs/reference/connector/apis/update-connector-scheduling-api.asciidoc b/docs/reference/connector/apis/update-connector-scheduling-api.asciidoc deleted file mode 100644 index eed3d14ea1d97..0000000000000 --- a/docs/reference/connector/apis/update-connector-scheduling-api.asciidoc +++ /dev/null @@ -1,143 +0,0 @@ -[[update-connector-scheduling-api]] -=== Update connector scheduling API -++++ -Update connector scheduling -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Updates the `scheduling` configuration of a connector. - -To get started with Connector APIs, check out <>. - - -[[update-connector-scheduling-api-request]] -==== {api-request-title} - -`PUT _connector//_scheduling` - -[[update-connector-scheduling-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. - -[[update-connector-scheduling-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[update-connector-scheduling-api-request-body]] -==== {api-request-body-title} - -`scheduling`:: -(Required, object) The scheduling configuration for the connector. This configuration determines frequency of synchronization operations for the connector. - -The scheduling configuration includes the following attributes, each represented as a `ScheduleConfig` object. If the `scheduling` object does not include all schedule types, only those provided will be updated; the others will remain unchanged. - -- `access_control` (Optional, `ScheduleConfig` object) Defines the schedule for synchronizing access control settings of the connector. - -- `full` (Optional, `ScheduleConfig` object) Defines the schedule for a full content syncs. - -- `incremental` (Optional, `ScheduleConfig` object) Defines the schedule for incremental content syncs. - -Each `ScheduleConfig` object includes the following sub-attributes: - - - `enabled` (Required, boolean) A flag that enables or disables the scheduling. - - - `interval` (Required, string) A CRON expression representing the sync schedule. This expression defines the grequency at which the sync operations should occur. It must be provided in a valid CRON format. - - -[[update-connector-scheduling-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector `scheduling` field was successfully updated. - -`400`:: -The `connector_id` was not provided or the request payload was malformed. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[update-connector-scheduling-api-example]] -==== {api-examples-title} - -The following example updates the `scheduling` property for the connector with ID `my-connector`: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "google_drive" -} --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-connector/_scheduling -{ - "scheduling": { - "access_control": { - "enabled": true, - "interval": "0 10 0 * * ?" - }, - "full": { - "enabled": true, - "interval": "0 20 0 * * ?" - }, - "incremental": { - "enabled": false, - "interval": "0 30 0 * * ?" - } - } -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- - -The following example updates `full` sync schedule only, other schedule types remain unchanged: - -[source,console] ----- -PUT _connector/my-connector/_scheduling -{ - "scheduling": { - "full": { - "enabled": true, - "interval": "0 10 0 * * ?" - } - } -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- diff --git a/docs/reference/connector/apis/update-connector-service-type-api.asciidoc b/docs/reference/connector/apis/update-connector-service-type-api.asciidoc deleted file mode 100644 index 28b4c72b682d2..0000000000000 --- a/docs/reference/connector/apis/update-connector-service-type-api.asciidoc +++ /dev/null @@ -1,96 +0,0 @@ -[[update-connector-service-type-api]] -=== Update connector service type API -++++ -Update connector service type -++++ - -beta::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Updates the `service_type` of a connector. - -To get started with Connector APIs, check out <>. - - -[[update-connector-service-type-api-request]] -==== {api-request-title} - -`PUT _connector//_service_type` - -[[update-connector-service-type-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. -* The `service_type` must be a valid type as defined by the Connector framework. -** When you change a configured connector's `service_type`, you'll also need to reset its configuration to ensure compatibility. - -[[update-connector-service-type-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[update-connector-service-type-api-request-body]] -==== {api-request-body-title} - -`service_type`:: -(Required, string) A connector service type defined in the https://github.com/elastic/connectors/blob/main/connectors/config.py#L94[Connector framework]. - - -[[update-connector-service-type-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector `service_type` field was successfully updated. - -`400`:: -The `connector_id` was not provided or the request payload was malformed. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[update-connector-service-type-api-example]] -==== {api-examples-title} - -The following example updates the `service_type` of the connector with ID `my-connector`: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "google_drive" -} --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-connector/_service_type -{ - "service_type": "sharepoint_online" -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- diff --git a/docs/reference/connector/apis/update-connector-status-api.asciidoc b/docs/reference/connector/apis/update-connector-status-api.asciidoc deleted file mode 100644 index 7812cbff89d1b..0000000000000 --- a/docs/reference/connector/apis/update-connector-status-api.asciidoc +++ /dev/null @@ -1,94 +0,0 @@ -[[update-connector-status-api]] -=== Update connector status API -++++ -Update connector status -++++ - -preview::[] - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-connector[Connector APIs]. --- - -Updates the `status` of a connector. - -To get started with Connector APIs, check out <>. - - -[[update-connector-status-api-request]] -==== {api-request-title} - -`PUT _connector//_status` - -[[update-connector-status-api-prereq]] -==== {api-prereq-title} - -* To sync data using self-managed connectors, you need to deploy the <>. on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors. -* The `connector_id` parameter should reference an existing connector. -* The change of `status` must be a valid status transition according to the https://github.com/elastic/connectors/blob/main/docs/CONNECTOR_PROTOCOL.md[Connector Protocol]. - -[[update-connector-status-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) - -[role="child_attributes"] -[[update-connector-status-api-request-body]] -==== {api-request-body-title} - -`status`:: -(Required, string) A valid connector status string, defined in the Connector Framework. - -[[update-connector-status-api-response-codes]] -==== {api-response-codes-title} - -`200`:: -Connector `status` field was successfully updated. - -`400`:: -The `connector_id` was not provided, the request payload was malformed, or the given status transition is not supported. - -`404` (Missing resources):: -No connector matching `connector_id` could be found. - -[[update-connector-status-api-example]] -==== {api-examples-title} - -The following example updates the `status` of the connector with ID `my-connector`: - -//// -[source, console] --------------------------------------------------- -PUT _connector/my-connector -{ - "index_name": "search-google-drive", - "name": "My Connector", - "service_type": "needs_configuration" -} --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _connector/my-connector --------------------------------------------------- -// TEARDOWN -//// - -[source,console] ----- -PUT _connector/my-connector/_status -{ - "status": "needs_configuration" -} ----- - -[source,console-result] ----- -{ - "result": "updated" -} ----- diff --git a/docs/reference/connector/docs/_connectors-create-client.asciidoc b/docs/reference/connector/docs/_connectors-create-client.asciidoc deleted file mode 100644 index 917777a2ac786..0000000000000 --- a/docs/reference/connector/docs/_connectors-create-client.asciidoc +++ /dev/null @@ -1,76 +0,0 @@ -[discrete#es-connectors-{service-name-stub}-client-create-use-the-ui] -==== Use the UI - -To create a new {service-name} connector: - -. In the Kibana UI, navigate to the *Search -> Content -> Connectors* page from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. -. Follow the instructions to create a new *{service-name}* self-managed connector. - -[discrete#es-connectors-{service-name-stub}-client-create-use-the-api] -==== Use the API - -You can use the {es} {ref}/connector-apis.html[Create connector API] to create a new self-managed {service-name} self-managed connector. - -For example: - -[source, console,subs="+attributes"] ----- -PUT _connector/my-{service-name-stub}-connector -{ - "index_name": "my-elasticsearch-index", - "name": "Content synced from {service-name}", - "service_type": "{service-name-stub}" -} ----- -// TEST[skip:can't test in isolation] - - -.You'll also need to *create an API key* for the connector to use. - - -[%collapsible] -=================================== - -[NOTE] -==== -The user needs the cluster privileges `manage_api_key`, `manage_connector` and `write_connector_secrets` to generate API keys programmatically. -==== - -To create an API key for the connector: - -. Run the following command, replacing values where indicated. -Note the `encoded` return values from the response: -+ -[source, console,subs="+attributes"] ----- -POST /_security/api_key -{ - "name": "connector_name-connector-api-key", - "role_descriptors": { - "connector_name-connector-role": { - "cluster": [ - "monitor", - "manage_connector" - ], - "indices": [ - { - "names": [ - "index_name", - ".search-acl-filter-index_name", - ".elastic-connectors*" - ], - "privileges": [ - "all" - ], - "allow_restricted_indices": false - } - ] - } - } -} ----- -+ -. Update your `config.yml` file with the API key `encoded` value. -=================================== - -Refer to the {ref}/connector-apis.html[{es} API documentation] for details of all available Connector APIs. diff --git a/docs/reference/connector/docs/_connectors-create-native-api-key.asciidoc b/docs/reference/connector/docs/_connectors-create-native-api-key.asciidoc deleted file mode 100644 index 99fde477eea5b..0000000000000 --- a/docs/reference/connector/docs/_connectors-create-native-api-key.asciidoc +++ /dev/null @@ -1,61 +0,0 @@ -[NOTE] -==== -The user needs the cluster privileges `manage_api_key`, `manage_connector` and `write_connector_secrets` to generate API keys programmatically. -==== - -To create an API key for the connector: - -. Run the following command, replacing values where indicated. -Note the `id` and `encoded` return values from the response: -+ -[source, console,subs="+attributes"] ----- -POST /_security/api_key -{ - "name": "my-connector-api-key", - "role_descriptors": { - "my-connector-connector-role": { - "cluster": [ - "monitor", - "manage_connector" - ], - "indices": [ - { - "names": [ - "my-index_name", - ".search-acl-filter-my-index_name", - ".elastic-connectors*" - ], - "privileges": [ - "all" - ], - "allow_restricted_indices": false - } - ] - } - } -} ----- -+ -. Use the `encoded` value to store a connector secret, and note the `id` return value from this response: -+ -[source, console,subs="+attributes"] ----- -POST _connector/_secret -{ - "value": "encoded_api_key" -} ----- -// TEST[skip:need to retrieve ids from the response] -+ -. Use the API key `id` and the connector secret `id` to update the connector: -+ -[source, console,subs="+attributes"] ----- -PUT /_connector/my_connector_id>/_api_key_id -{ - "api_key_id": "API key_id", - "api_key_secret_id": "secret_id" -} ----- -// TEST[skip:need to retrieve ids from the response] diff --git a/docs/reference/connector/docs/_connectors-create-native.asciidoc b/docs/reference/connector/docs/_connectors-create-native.asciidoc deleted file mode 100644 index 8023fbbe61136..0000000000000 --- a/docs/reference/connector/docs/_connectors-create-native.asciidoc +++ /dev/null @@ -1,38 +0,0 @@ -[discrete#es-connectors-{service-name-stub}-create-use-the-ui] -==== Use the UI - -To create a new {service-name} connector: - -. In the Kibana UI, navigate to the *Search -> Content -> Connectors* page from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. -. Follow the instructions to create a new native *{service-name}* connector. - -For additional operations, see <>. - -[discrete#es-connectors-{service-name-stub}-create-use-the-api] -==== Use the API - -You can use the {es} {ref}/connector-apis.html[Create connector API] to create a new native {service-name} connector. - -For example: - -[source, console,subs="+attributes"] ----- -PUT _connector/my-{service-name-stub}-connector -{ - "index_name": "my-elasticsearch-index", - "name": "Content synced from {service-name}", - "service_type": "{service-name-stub}", - "is_native": true -} ----- -// TEST[skip:can't test in isolation] - -.You'll also need to *create an API key* for the connector to use. - -[%collapsible] -=================================== -include::_connectors-create-native-api-key.asciidoc[] -=================================== - -Refer to the {ref}/connector-apis.html[{es} API documentation] for details of all available Connector APIs. - diff --git a/docs/reference/connector/docs/_connectors-docker-instructions.asciidoc b/docs/reference/connector/docs/_connectors-docker-instructions.asciidoc deleted file mode 100644 index 4626ea23c2e71..0000000000000 --- a/docs/reference/connector/docs/_connectors-docker-instructions.asciidoc +++ /dev/null @@ -1,76 +0,0 @@ -You can deploy the {service-name} connector as a self-managed connector using Docker. -Follow these instructions. - -.*Step 1: Download sample configuration file* -[%collapsible] -==== -Download the sample configuration file. -You can either download it manually or run the following command: - -[source,sh] ----- -curl https://raw.githubusercontent.com/elastic/connectors/main/config.yml.example --output ~/connectors-config/config.yml ----- -// NOTCONSOLE - -Remember to update the `--output` argument value if your directory name is different, or you want to use a different config file name. -==== - -.*Step 2: Update the configuration file for your self-managed connector* -[%collapsible] -==== -Update the configuration file with the following settings to match your environment: - -* `elasticsearch.host` -* `elasticsearch.api_key` -* `connectors` - -If you're running the connector service against a Dockerized version of Elasticsearch and Kibana, your config file will look like this: - -[source,yaml,subs="attributes"] ----- -# When connecting to your cloud deployment you should edit the host value -elasticsearch.host: http://host.docker.internal:9200 -elasticsearch.api_key: - -connectors: - - - connector_id: - service_type: {service-name-stub} - api_key: # Optional. If not provided, the connector will use the elasticsearch.api_key instead - ----- - -Using the `elasticsearch.api_key` is the recommended authentication method. However, you can also use `elasticsearch.username` and `elasticsearch.password` to authenticate with your Elasticsearch instance. - -Note: You can change other default configurations by simply uncommenting specific settings in the configuration file and modifying their values. - -==== - -.*Step 3: Run the Docker image* -[%collapsible] -==== -Run the Docker image with the Connector Service using the following command: - -[source,sh,subs="attributes"] ----- -docker run \ --v ~/connectors-config:/config \ ---network "elastic" \ ---tty \ ---rm \ -docker.elastic.co/integrations/elastic-connectors:{version}.0 \ -/app/bin/elastic-ingest \ --c /config/config.yml ----- -==== - -Refer to {connectors-python}/docs/DOCKER.md[`DOCKER.md`^] in the `elastic/connectors` repo for more details. - -Find all available Docker images in the https://www.docker.elastic.co/r/integrations/elastic-connectors[official registry]. - -[TIP] -==== -We also have a quickstart self-managed option using Docker Compose, so you can spin up all required services at once: Elasticsearch, Kibana, and the connectors service. -Refer to this https://github.com/elastic/connectors/tree/main/scripts/stack#readme[README] in the `elastic/connectors` repo for more information. -==== diff --git a/docs/reference/connector/docs/_connectors-list-advanced-rules.asciidoc b/docs/reference/connector/docs/_connectors-list-advanced-rules.asciidoc deleted file mode 100644 index b09aa7f1d4e7e..0000000000000 --- a/docs/reference/connector/docs/_connectors-list-advanced-rules.asciidoc +++ /dev/null @@ -1,14 +0,0 @@ -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> diff --git a/docs/reference/connector/docs/_connectors-list-clients.asciidoc b/docs/reference/connector/docs/_connectors-list-clients.asciidoc deleted file mode 100644 index b56d7458d2924..0000000000000 --- a/docs/reference/connector/docs/_connectors-list-clients.asciidoc +++ /dev/null @@ -1,29 +0,0 @@ -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> diff --git a/docs/reference/connector/docs/_connectors-list-dls.asciidoc b/docs/reference/connector/docs/_connectors-list-dls.asciidoc deleted file mode 100644 index f5ea6c1d43c5c..0000000000000 --- a/docs/reference/connector/docs/_connectors-list-dls.asciidoc +++ /dev/null @@ -1,13 +0,0 @@ -* <> -* <> -* <> (including Jira Data Center) -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> \ No newline at end of file diff --git a/docs/reference/connector/docs/_connectors-list-incremental.asciidoc b/docs/reference/connector/docs/_connectors-list-incremental.asciidoc deleted file mode 100644 index 88822164258bb..0000000000000 --- a/docs/reference/connector/docs/_connectors-list-incremental.asciidoc +++ /dev/null @@ -1,19 +0,0 @@ -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> \ No newline at end of file diff --git a/docs/reference/connector/docs/_connectors-list-local-content-extraction.asciidoc b/docs/reference/connector/docs/_connectors-list-local-content-extraction.asciidoc deleted file mode 100644 index d86583644d574..0000000000000 --- a/docs/reference/connector/docs/_connectors-list-local-content-extraction.asciidoc +++ /dev/null @@ -1,16 +0,0 @@ -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> diff --git a/docs/reference/connector/docs/_connectors-list-native.asciidoc b/docs/reference/connector/docs/_connectors-list-native.asciidoc deleted file mode 100644 index 9222abe11fc3a..0000000000000 --- a/docs/reference/connector/docs/_connectors-list-native.asciidoc +++ /dev/null @@ -1,26 +0,0 @@ -* <> -* <> -* <> (including Confluence Data Center) -* <> -* <> -* <> -* <> -* <> -* <> (including Jira Data Center) -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> \ No newline at end of file diff --git a/docs/reference/connector/docs/_connectors-list-workplace-search.asciidoc b/docs/reference/connector/docs/_connectors-list-workplace-search.asciidoc deleted file mode 100644 index f9391f3bf4a1e..0000000000000 --- a/docs/reference/connector/docs/_connectors-list-workplace-search.asciidoc +++ /dev/null @@ -1,22 +0,0 @@ -* {workplace-search-ref}/workplace-search-box-connector.html[Box^] -* {workplace-search-ref}/workplace-search-confluence-cloud-connector.html[Confluence Cloud^] -* https://github.com/elastic/connectors-ruby/tree/8.3/lib/connectors_sdk/confluence_cloud[Confluence Cloud^] (Ruby connector package) -* {workplace-search-ref}/workplace-search-confluence-server-connector.html[Confluence Server^] -* {workplace-search-ref}/workplace-search-dropbox-connector.html[Dropbox^] -* {workplace-search-ref}/workplace-search-github-connector.html[GitHub^] -* {workplace-search-ref}/workplace-search-gmail-connector.html[Gmail^] -* {workplace-search-ref}/workplace-search-google-drive-connector.html[Google Drive^] -* {workplace-search-ref}/workplace-search-jira-cloud-connector.html[Jira Cloud^] -* {workplace-search-ref}/workplace-search-jira-server-connector.html[Jira Server^] -* https://github.com/elastic/enterprise-search-network-drive-connector[Network drives^] (Python connector package) -* {workplace-search-ref}/workplace-search-onedrive-connector.html[OneDrive^] -* https://github.com/elastic/enterprise-search-microsoft-outlook-connector[Outlook^] (Python connector package) -* {workplace-search-ref}/workplace-search-salesforce-connector.html[Salesforce^] -* {workplace-search-ref}/workplace-search-servicenow-connector.html[ServiceNow^] -* {workplace-search-ref}/workplace-search-sharepoint-online-connector.html[SharePoint Online^] -* https://github.com/elastic/connectors-ruby/tree/8.3/lib/connectors_sdk/share_point[SharePoint Online^] (Ruby connector package) -* https://github.com/elastic/enterprise-search-sharepoint-server-connector[SharePoint Server^] (Python connector package) -* {workplace-search-ref}/workplace-search-slack-connector.html[Slack^] -* {workplace-search-ref}/microsoft-teams.html[Teams^] (Python connector package) -* {workplace-search-ref}/workplace-search-zendesk-connector.html[Zendesk^] -* {workplace-search-ref}/zoom.html[Zoom^] (Python connector package) diff --git a/docs/reference/connector/docs/_connectors-overview-table.asciidoc b/docs/reference/connector/docs/_connectors-overview-table.asciidoc deleted file mode 100644 index f5f8103349dde..0000000000000 --- a/docs/reference/connector/docs/_connectors-overview-table.asciidoc +++ /dev/null @@ -1,51 +0,0 @@ -This table provides an overview of our available connectors, their current support status, and the features they support. - -The columns provide specific information about each connector: - -* *Status*: Indicates whether the connector is in General Availability (GA), Technical Preview, Beta, or is an Example connector. -* *Native (Elastic Cloud)*: Specifies the versions in which the connector is available as a managed service, if applicable. -* *Advanced sync rules*: Specifies the versions in which advanced sync rules are supported, if applicable. -* *Local binary extraction service*: Specifies the versions in which the local binary extraction service is supported, if applicable. -* *Incremental syncs*: Specifies the version in which incremental syncs are supported, if applicable. -* *Document level security*: Specifies the version in which document level security is supported, if applicable. -* *Code*: Provides a link to the connector's source code in the https://github.com/elastic/connectors[`elastic/connectors` GitHub repository]. - -NOTE: All connectors are available as self-managed <>. - -[options="header"] -|==================================================================================================================================================================================================== -| Connector| Status| Native (Elastic Cloud) | <> | <> | <> | <> | Source code -|<>|*GA*|8.9+|-|8.11+|8.13+|-|https://github.com/elastic/connectors/tree/main/connectors/sources/azure_blob_storage.py[View code] -|<>|*Preview*|8.14+|-|-|8.13+|-|https://github.com/elastic/connectors/tree/main/connectors/sources/box.py[View code] -|<>|*GA*|8.9+|8.9+|8.11+|8.13+|8.10|https://github.com/elastic/connectors/tree/main/connectors/sources/confluence.py[View code] -|<>|*Preview*|8.13+|8.13+|8.13+|8.13+|8.14+|https://github.com/elastic/connectors/tree/main/connectors/sources/confluence.py[View code] -|<>|*GA*|8.9+|8.9+|8.11+|8.13+|8.14+|https://github.com/elastic/connectors/tree/main/connectors/sources/confluence.py[View code] -|<>|*GA*|8.10+|-|8.11+|8.13+|8.12+|https://github.com/elastic/connectors/tree/main/connectors/sources/dropbox.py[View code] -|<>|*GA*|8.11+|8.10+|8.11+|8.13+|8.12+|https://github.com/elastic/connectors/tree/main/connectors/sources/github.py[View code] -|<>|*GA*|8.13+|-|-|8.13+|8.10+|https://github.com/elastic/connectors/tree/main/connectors/sources/gmail.py[View code] -|<>|*GA*|8.12+|-|8.11+|8.13+|-|https://github.com/elastic/connectors/tree/main/connectors/sources/google_cloud_storage.py[View code] -|<>|*GA*|8.11+|-|8.11+|8.13+|8.10+|https://github.com/elastic/connectors/tree/main/connectors/sources/google_drive.py[View code] -|<>|*Preview*|-|-|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/graphql.py[View code] -|<>|*GA*|8.9+|8.9+|8.11+|8.13+|8.10+|https://github.com/elastic/connectors/tree/main/connectors/sources/jira.py[View code] -|<>|*Preview*|8.13+|8.13+|8.13+|8.13+|8.13+*|https://github.com/elastic/connectors/tree/main/connectors/sources/jira.py[View code] -|<>|*GA*|8.9+|8.9+|8.11+|8.13+|-|https://github.com/elastic/connectors/tree/main/connectors/sources/jira.py[View code] -|<>|*GA*|8.8|8.8 native/ 8.12 self-managed|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/mongo.py[View code] -|<>|*GA*|8.8+|8.11+|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/mssql.py[View code] -|<>|*GA*|8.5+|8.8+|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/mysql.py[View code] -|<>|*GA*|8.9+|8.10+|8.14+|8.13+|8.11+|https://github.com/elastic/connectors/tree/main/connectors/sources/network_drive.py[View code] -|<>|*GA*|8.14+|8.14+|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/notion.py[View code] -|<>|*GA*|8.11+|8.11+|8.11+|8.13+|8.11+|https://github.com/elastic/connectors/tree/main/connectors/sources/onedrive.py[View code] -|<>|Example|n/a|n/a|n/a|n/a|-|https://github.com/elastic/connectors/tree/main/connectors/sources/opentext_documentum.py[View code] -|<>|*GA*|8.12+|-|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/oracle.py[View code] -|<>|*GA*|8.13+|-|8.11+|8.13+|8.14+|https://github.com/elastic/connectors/tree/main/connectors/sources/outlook.py[View code] -|<>|*GA*|8.8+|8.11+|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/postgresql.py[View code] -|<>|*Preview*|-|-|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/redis.py[View code] -|<>|*GA*|8.12+|8.12+|8.11+|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/s3.py[View code] -|<>|*GA*|8.12+|8.12+|8.11+|8.13+|8.13+|https://github.com/elastic/connectors/tree/main/connectors/sources/salesforce.py[View code] -|<>|*GA*|8.10+|8.10+|8.11+|8.13+|8.13+|https://github.com/elastic/connectors/tree/main/connectors/sources/servicenow.py[View code] -|<>|*GA*|8.9+|8.9+|8.9+|8.9+|8.9+|https://github.com/elastic/connectors/tree/main/connectors/sources/sharepoint_online.py[View code] -|<>|*Beta*|8.15+|-|8.11+|8.13+|8.15+|https://github.com/elastic/connectors/tree/main/connectors/sources/sharepoint_server.py[View code] -|<>|*Preview*|8.14+|-|-|-|-|https://github.com/elastic/connectors/tree/main/connectors/sources/slack.py[View code] -|<>|*Preview*|8.14+|-|-|8.13+|-|https://github.com/elastic/connectors/tree/main/connectors/sources/teams.py[View code] -|<>|*Preview*|8.14+|-|8.11+|8.13+|-|https://github.com/elastic/connectors/tree/main/connectors/sources/zoom.py[View code] -|==================================================================================================================================================================================================== diff --git a/docs/reference/connector/docs/connectors-API-tutorial.asciidoc b/docs/reference/connector/docs/connectors-API-tutorial.asciidoc deleted file mode 100644 index 4118c564e4759..0000000000000 --- a/docs/reference/connector/docs/connectors-API-tutorial.asciidoc +++ /dev/null @@ -1,487 +0,0 @@ -[#es-connectors-tutorial-api] -=== Connector API tutorial -++++ -API tutorial -++++ - -Learn how to set up a self-managed connector using the {ref}/connector-apis.html[{es} Connector APIs]. - -For this example we'll use the connectors-postgresql,PostgreSQL connector to sync data from a PostgreSQL database to {es}. -We'll spin up a simple PostgreSQL instance in Docker with some example data, create a connector, and sync the data to {es}. -You can follow the same steps to set up a connector for another data source. - -[TIP] -==== -This tutorial focuses on running a self-managed connector on your own infrastructure, and managing syncs using the Connector APIs. -See connectors for an overview of how connectors work. - -If you're just getting started with {es}, this tutorial might be a bit advanced. -Refer to {ref}/getting-started.html[quickstart] for a more beginner-friendly introduction to {es}. - -If you're just getting started with connectors, you might want to start in the UI first. -We have two tutorials that focus on managing connectors using the UI: - -* <>. Set up a native MongoDB connector, fully managed in Elastic Cloud. -* <>. Set up a self-managed PostgreSQL connector. -==== - -[discrete#es-connectors-tutorial-api-prerequisites] -==== Prerequisites - -* You should be familiar with how connectors, connectors work, to understand how the API calls relate to the overall connector setup. -* You need to have https://www.docker.com/products/docker-desktop/[Docker Desktop] installed. -* You need to have {es} running, and an API key to access it. -Refer to the next section for details, if you don't have an {es} deployment yet. - -[discrete#es-connectors-tutorial-api-setup-es] -==== Set up {es} - -If you already have an {es} deployment on Elastic Cloud (_Hosted deployment_ or _Serverless project_), you're good to go. -To spin up {es} in local dev mode in Docker for testing purposes, open the collapsible section below. - -.*Run local {es} in Docker* -[%collapsible] -=============== - -[source,sh,subs="attributes+"] ----- -docker run -p 9200:9200 -d --name elasticsearch \ - -e "discovery.type=single-node" \ - -e "xpack.security.enabled=false" \ - -e "xpack.security.http.ssl.enabled=false" \ - -e "xpack.license.self_generated.type=trial" \ - docker.elastic.co/elasticsearch/elasticsearch:{version} ----- - -[WARNING] -==== -This {es} setup is for development purposes only. -Never use this configuration in production. -Refer to {ref}/setup.html[Set up {es}] for production-grade installation instructions, including Docker. -==== - -We will use the default password `changeme` for the `elastic` user. For production environments, always ensure your cluster runs with security enabled. - -[source,sh] ----- -export ELASTIC_PASSWORD="changeme" ----- - -Since we run our cluster locally with security disabled, we won't use API keys to authenticate against the {es}. Instead, in each cURL request, we will use the `-u` flag for authentication. - -Let's test that we can access {es}: - -[source,sh] ----- -curl -s -X GET -u elastic:$ELASTIC_PASSWORD http://localhost:9200 ----- -// NOTCONSOLE - -Note: With {es} running locally, you will need to pass the username and password to authenticate against {es} in the configuration file for the connector service. - -=============== - -.Running API calls -**** - -You can run API calls using the https://www.elastic.co/guide/en/kibana/master/console-kibana.html[Dev Tools Console] in Kibana, using `curl` in your terminal, or with our programming language clients. -Our example widget allows you to copy code examples in both Dev Tools Console syntax and curl syntax. -To use curl, you'll need to add authentication headers to your request. - -Here's an example of how to do that. Note that if you want the connector ID to be auto-generated, use the `POST _connector` endpoint. - -[source,sh] ----- -curl -s -X PUT http://localhost:9200/_connector/my-connector-id \ --H "Authorization: APIKey $APIKEY" \ --H "Content-Type: application/json" \ --d '{ - "name": "Music catalog", - "index_name": "music", - "service_type": "postgresql" -}' ----- -// NOTCONSOLE - -Refer to connectors-tutorial-api-create-api-key for instructions on creating an API key. -**** - -[discrete#es-connectors-tutorial-api-setup-postgres] -==== Run PostgreSQL instance in Docker (optional) - -For this tutorial, we'll set up a PostgreSQL instance in Docker with some example data. -Of course, you can *skip this step and use your own existing PostgreSQL instance* if you have one. -Keep in mind that using a different instance might require adjustments to the connector configuration described in the next steps. - -.*Expand* to run simple PostgreSQL instance in Docker and import example data -[%collapsible] -=============== - -Let's launch a PostgreSQL container with a user and password, exposed at port `5432`: - -[source,sh] ----- -docker run --name postgres -e POSTGRES_USER=myuser -e POSTGRES_PASSWORD=mypassword -p 5432:5432 -d postgres ----- - -*Download and import example data* - -Next we need to create a directory to store our example dataset for this tutorial. -In your terminal, run the following command: - -[source,sh] ----- -mkdir -p ~/data ----- - -We will use the https://github.com/lerocha/chinook-database/blob/master/ChinookDatabase/DataSources/Chinook_PostgreSql.sql[Chinook dataset] example data. - -Run the following command to download the file to the `~/data` directory: - -[source,sh] ----- -curl -L https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_PostgreSql.sql -o ~/data/Chinook_PostgreSql.sql ----- -// NOTCONSOLE - -Now we need to import the example data into the PostgreSQL container and create the tables. - -Run the following Docker commands to copy our sample data into the container and execute the `psql` script: - -[source,sh] ----- -docker cp ~/data/Chinook_PostgreSql.sql postgres:/ -docker exec -it postgres psql -U myuser -f /Chinook_PostgreSql.sql ----- - -Let's verify that the tables are created correctly in the `chinook` database: - -[source,sh] ----- -docker exec -it postgres psql -U myuser -d chinook -c "\dt" ----- - -The `album` table should contain *347* entries and the `artist` table should contain *275* entries. -=============== - -This tutorial uses a very basic setup. To use advanced functionality such as filtering rules and incremental syncs, enable `track_commit_timestamp` on your PostgreSQL database. Refer to postgresql-connector-client-tutorial for more details. - -Now it's time for the real fun! We'll set up a connector to create a searchable mirror of our PostgreSQL data in {es}. - -[discrete#es-connectors-tutorial-api-create-connector] -==== Create a connector - -We'll use the https://www.elastic.co/guide/en/elasticsearch/reference/master/create-connector-api.html[Create connector API] to create a PostgreSQL connector instance. - -Run the following API call, using the https://www.elastic.co/guide/en/kibana/master/console-kibana.html[Dev Tools Console] or `curl`: - -[source,console] ----- -PUT _connector/my-connector-id -{ - "name": "Music catalog", - "index_name": "music", - "service_type": "postgresql" -} ----- -// TEST[skip:TODO] - -[TIP] -==== -`service_type` refers to the third-party data source you're connecting to. -==== - -Note that we specified the `my-connector-id` ID as a part of the `PUT` request. -We'll need the connector ID to set up and run the connector service locally. - -If you'd prefer to use an autogenerated ID, replace `PUT _connector/my-connector-id` with `POST _connector`. - -[discrete#es-connectors-tutorial-api-deploy-connector] -==== Run connector service - -[NOTE] -==== -The connector service runs automatically in Elastic Cloud, if you're using our managed Elastic managed connectors. -Because we're running a self-managed connector, we need to spin up this service locally. -==== - -Now we'll run the connector service so we can start syncing data from our PostgreSQL instance to {es}. -We'll use the steps outlined in connectors-run-from-docker. - -When running the connectors service on your own infrastructure, you need to provide a configuration file with the following details: - -* Your {es} endpoint (`elasticsearch.host`) -* An {es} API key (`elasticsearch.api_key`) -* Your third-party data source type (`service_type`) -* Your connector ID (`connector_id`) - -[discrete#es-connectors-tutorial-api-create-api-key] -===== Create an API key - -If you haven't already created an API key to access {es}, you can use the {ref}/security-api-create-api-key.html[_security/api_key] endpoint. - -Here, we assume your target {es} index name is `music`. If you use a different index name, adjust the request body accordingly. - -[source,console] ----- -POST /_security/api_key -{ - "name": "music-connector", - "role_descriptors": { - "music-connector-role": { - "cluster": [ - "monitor", - "manage_connector" - ], - "indices": [ - { - "names": [ - "music", - ".search-acl-filter-music", - ".elastic-connectors*" - ], - "privileges": [ - "all" - ], - "allow_restricted_indices": false - } - ] - } - } -} ----- -// TEST[skip:TODO] - -You'll need to use the `encoded` value from the response as the `elasticsearch.api_key` in your configuration file. - -[TIP] -==== -You can also create an API key in the {kib} and Serverless UIs. -==== - -[discrete#es-connectors-tutorial-api-prepare-configuration-file] -===== Prepare the configuration file - -Let's create a directory and a `config.yml` file to store the connector configuration: - -[source,sh] ----- -mkdir -p ~/connectors-config -touch ~/connectors-config/config.yml ----- - -Now, let's add our connector details to the config file. -Open `config.yml` and paste the following configuration, replacing placeholders with your own values: - -[source,yaml] ----- -elasticsearch.host: # Your Elasticsearch endpoint -elasticsearch.api_key: # Your Elasticsearch API key - -connectors: - - connector_id: "my-connector-id" - service_type: "postgresql" ----- - -We provide an https://raw.githubusercontent.com/elastic/connectors/main/config.yml.example[example configuration file] in the `elastic/connectors` repository for reference. - -[discrete#es-connectors-tutorial-api-run-connector-service] -===== Run the connector service - -Now that we have the configuration file set up, we can run the connector service locally. -This will point your connector instance at your {es} deployment. - -Run the following Docker command to start the connector service: - -[source,sh,subs="attributes+"] ----- -docker run \ --v "$HOME/connectors-config:/config" \ ---rm \ ---tty -i \ ---network host \ -docker.elastic.co/integrations/elastic-connectors:{version}.0 \ -/app/bin/elastic-ingest \ --c /config/config.yml ----- - -Verify your connector is connected by getting the connector status (should be `needs_configuration`) and `last_seen` field (note that time is reported in UTC). -The `last_seen` field indicates that the connector successfully connected to {es}. - -[source, console] ----- -GET _connector/my-connector-id ----- -// TEST[skip:TODO] - -[discrete#es-connectors-tutorial-api-update-connector-configuration] -==== Configure connector - -Now our connector instance is up and running, but it doesn't yet know _where_ to sync data from. -The final piece of the puzzle is to configure our connector with details about our PostgreSQL instance. -When setting up a connector in the Elastic Cloud or Serverless UIs, you're prompted to add these details in the user interface. - -But because this tutorial is all about working with connectors _programmatically_, we'll use the {ref}/update-connector-configuration-api.html[Update connector configuration API] to add our configuration details. - -[TIP] -==== -Before configuring the connector, ensure that the configuration schema is registered by the service. -For Elastic managed connectors, this occurs shortly after creation via the API. -For self-managed connectors, the schema registers on service startup (once the `config.yml` is populated). - -Configuration updates via the API are possible only _after schema registration_. -Verify this by checking the configuration property returned by the `GET _connector/my-connector-id` request. -It should be non-empty. -==== - -Run the following API call to configure the connector with our connectors-postgresql-client-configuration,PostgreSQL configuration details: - -[source, console] ----- -PUT _connector/my-connector-id/_configuration -{ - "values": { - "host": "127.0.0.1", - "port": 5432, - "username": "myuser", - "password": "mypassword", - "database": "chinook", - "schema": "public", - "tables": "album,artist" - } -} ----- -// TEST[skip:TODO] - -[NOTE] -==== -Configuration details are specific to the connector type. -The keys and values will differ depending on which third-party data source you're connecting to. -Refer to the individual connectors-references,connector references for these configuration details. -==== - -[discrete#es-connectors-tutorial-api-sync] -==== Sync data - -[NOTE] -==== -We're using a self-managed connector in this tutorial. -To use these APIs with an Elastic managed connector, there's some extra setup for API keys. -Refer to <> for details. -==== - -We're now ready to sync our PostgreSQL data to {es}. -Run the following API call to start a full sync job: - -[source, console] ----- -POST _connector/_sync_job -{ - "id": "my-connector-id", - "job_type": "full" -} ----- -// TEST[skip:TODO] - -To store data in {es}, the connector needs to create an index. -When we created the connector, we specified the `music` index. -The connector will create and configure this {es} index before launching the sync job. - -[TIP] -==== -In the approach we've used here, the connector will use {ref}/mapping.html#mapping-dynamic[dynamic mappings] to automatically infer the data types of your fields. -In a real-world scenario you would use the {es} {ref}/indices-create-index.html[Create index API] to first create the index with the desired field mappings and index settings. -Defining your own mappings upfront gives you more control over how your data is indexed. -==== - -[discrete#es-connectors-tutorial-api-check-sync-status] -===== Check sync status - -Use the {ref}/get-connector-sync-job-api.html[Get sync job API] to track the status and progress of the sync job. -By default, the most recent job statuses are returned first. -Run the following API call to check the status of the sync job: - -[source, console] ----- -GET _connector/_sync_job?connector_id=my-connector-id&size=1 ----- -// TEST[skip:TODO] - -The job document will be updated as the sync progresses, you can check it as often as you'd like to poll for updates. - -Once the job completes, the status should be `completed` and `indexed_document_count` should be *622*. - -Verify that data is present in the `music` index with the following API call: - -[source, console] ----- -GET music/_count ----- -// TEST[skip:TODO] - -{es} stores data in documents, which are JSON objects. -List the individual documents with the following API call: - -[source, console] ----- -GET music/_search ----- -// TEST[skip:TODO] - -[discrete#es-connectors-tutorial-api-troubleshooting] -=== Troubleshooting - -Use the following command to inspect the latest sync job's status: - -[source, console] ----- -GET _connector/_sync_job?connector_id=my-connector-id&size=1 ----- -// TEST[skip:TODO] - -If the connector encountered any errors during the sync, you'll find these in the `error` field. - -[discrete#es-connectors-tutorial-api-cleanup] -==== Cleaning up - -To delete the connector and its associated sync jobs run this command: - -[source, console] ----- -DELETE _connector/my-connector-id&delete_sync_jobs=true ----- -// TEST[skip:TODO] - -This won't delete the Elasticsearch index that was created by the connector to store the data. -Delete the `music` index by running the following command: - -[source, console] ----- -DELETE music ----- -// TEST[skip:TODO] - -To remove the PostgreSQL container, run the following commands: - -[source,sh] ----- -docker stop postgres -docker rm postgres ----- - -To remove the connector service, run the following commands: -[source,sh] ----- -docker stop -docker rm ----- - -[discrete#es-connectors-tutorial-api-next-steps] -==== Next steps - -Congratulations! You've successfully set up a self-managed connector using the Connector APIs. - -Here are some next steps to explore: - -* Learn more about the {ref}/connector-apis.html[Connector APIs]. -* Learn how to deploy {es}, {kib}, and the connectors service using Docker Compose in our https://github.com/elastic/connectors/tree/main/scripts/stack#readme[quickstart guide]. diff --git a/docs/reference/connector/docs/connectors-APIs.asciidoc b/docs/reference/connector/docs/connectors-APIs.asciidoc deleted file mode 100644 index ad6ad78353fc0..0000000000000 --- a/docs/reference/connector/docs/connectors-APIs.asciidoc +++ /dev/null @@ -1,22 +0,0 @@ -[#es-connectors-apis] -== Connector APIs - -In 8.12 we introduced a set of APIs to create and manage Elastic connectors and sync jobs. -Learn more in the {es} REST API documentation: - -* {ref}/connector-apis.html[Connector APIs] -* {ref}/connector-apis.html#sync-job-apis[Sync job APIs] - -.Connector API tutorial -[sidebar] --- -Check out this concrete <> to get started with the Connector APIs. --- - -[discrete#es-connectors-apis-cli] -=== Command-line interface - -We also provide a command-line interface. -Learn more in the https://github.com/elastic/connectors/blob/main/docs/CLI.md[`elastic/connectors` repo]. - -Use these tools if you'd like to work with connectors and sync jobs programmatically, without using the UI. \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-architecture.asciidoc b/docs/reference/connector/docs/connectors-architecture.asciidoc deleted file mode 100644 index 3269f2badb308..0000000000000 --- a/docs/reference/connector/docs/connectors-architecture.asciidoc +++ /dev/null @@ -1,45 +0,0 @@ -[#es-connectors-overview-architecture] -=== Internal knowledge search architecture -++++ -Internal knowledge search -++++ - -The following section provides a high-level overview of common architecture approaches for the internal knowledge search use case (AKA workplace search). - -[discrete#es-connectors-overview-architecture-hybrid] -==== Hybrid architecture - -Data is synced to an Elastic Cloud deployment through managed connectors and/or self-managed connectors. -A self-managed search application exposes the relevant data that your end users are authorized to see in a search experience. - -Summary: - -* The best combination in terms of flexibility and out-of-the box functionality -* Integrates with Elastic Cloud hosted managed connectors to bring data to Elasticsearch with minimal operational overhead -* Self-managed connectors allow enterprises to adhere to strict access policies when using firewalls that don't allow incoming connections to data sources, while outgoing traffic is easier to control -* Provides additional functionality available for self-managed connectors such as the <> -* Basic functionality available for Standard licenses, advanced features for Platinum licenses - -The following diagram provides a high-level overview of the hybrid internal knowledge search architecture. - -[.screenshot] -image::images/hybrid-architecture.png[align="center",width="50%"] - -[discrete#es-connectors-overview-architecture-self-managed] -==== Self-managed architecture - -Data is synced to an Elastic deployment through self-managed connectors. -A self-managed search application exposes the relevant data that your end users are authorized to see in a search experience. - -Summary: - -* Gives flexibility to build custom solutions tailored to specific business requirements and internal processes -* Allows enterprises to adhere to strict access policies when using firewalls that don't allow incoming connections to data sources, while outgoing traffic is easier to control -* Provides additional functionality available for self-managed connectors such as the <> -* Feasible for air-gapped environments -* Requires Platinum license for full spectrum of features and self-managed connectors - -The following diagram provides a high-level overview of the self-managed internal knowledge search architecture. - -[.screenshot] -image::images/self-managed-architecture.png[align="center",width="50%"] \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-azure-blob.asciidoc b/docs/reference/connector/docs/connectors-azure-blob.asciidoc deleted file mode 100644 index 0e0978a423e3c..0000000000000 --- a/docs/reference/connector/docs/connectors-azure-blob.asciidoc +++ /dev/null @@ -1,291 +0,0 @@ -[#es-connectors-azure-blob] -=== Elastic Azure Blob Storage connector reference -++++ -Azure Blob Storage -++++ -// Attributes used in this file -:service-name: Azure Blob Storage -:service-name-stub: azure_blob_storage - -The _Elastic Azure Blob Storage connector_ is a <> for https://azure.microsoft.com/en-us/services/storage/blobs/[Azure Blob Storage^]. - -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-azure-blob-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-azure-blob-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* on Elastic Cloud, as of *8.9.1*. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[discrete#es-connectors-azure-blob-compatability] -===== Compatibility - -This connector has not been tested with Azure Government. -Therefore we cannot guarantee that it will work with Azure Government endpoints. -For more information on Azure Government compared to Global Azure, refer to the - https://learn.microsoft.com/en-us/azure/azure-government/compare-azure-government-global-azure[official Microsoft documentation^]. - -[discrete#es-connectors-{service-name-stub}-create-native-connector] -===== Create {service-name} connector - -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-azure-blob-usage] -===== Usage - -To use this connector as a *managed connector*, see <>. - -For additional operations, see <>. - -[discrete#es-connectors-azure-blob-configuration] -===== Configuration - -The following configuration fields are required to set up the connector: - -Account name:: -Name of Azure Blob Storage account. - -Account key:: -https://learn.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal[Account key^] for the Azure Blob Storage account. - -Blob endpoint:: -Endpoint for the Blob Service. - -Containers:: -List of containers to index. -`*` will index all containers. - -[discrete#es-connectors-azure-blob-documents-syncs] -===== Documents and syncs - -The connector will fetch all data available in the container. - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced. -**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-azure-blob-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-azure-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. -Currently filtering is controlled via ingest pipelines. - -[discrete#es-connectors-azure-blob-content-extraction] -===== Content extraction - -See <>. - -[discrete#es-connectors-azure-blob-known-issues] -===== Known issues - -This connector has the following known issues: - -* *`lease data` and `tier` fields are not updated in Elasticsearch indices* -+ -This is because the blob timestamp is not updated. -Refer to https://github.com/elastic/connectors-python/issues/289[Github issue^]. - -[discrete#es-connectors-azure-blob-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-azure-blob-security] -===== Security - -See <>. - -View the {connectors-python}/connectors/sources/azure_blob_storage.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_) - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-azure-blob-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-azure-blob-client-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. -This self-managed connector is compatible with Elastic versions *8.6.0+*. -To use this connector, satisfy all <>. - -[discrete#es-connectors-azure-blob-client-compatability] -===== Compatibility - -This connector has not been tested with Azure Government. -Therefore we cannot guarantee that it will work with Azure Government endpoints. -For more information on Azure Government compared to Global Azure, refer to the - https://learn.microsoft.com/en-us/azure/azure-government/compare-azure-government-global-azure[official Microsoft documentation^]. - -[discrete#es-connectors-{service-name-stub}-create-connector-client] -===== Create {service-name} connector - -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-azure-blob-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, see <> -For additional usage operations, see <>. - -[discrete#es-connectors-azure-blob-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/azure_blob_storage.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required to set up the connector: - -`account_name`:: -Name of Azure Blob Storage account. - -`account_key`:: -https://learn.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal[Account key^] for the Azure Blob Storage account. - -`blob_endpoint`:: -Endpoint for the Blob Service. - -`containers`:: -List of containers to index. -`*` will index all containers. - -`retry_count`:: -Number of retry attempts after a failed call. -Default value is `3`. - -`concurrent_downloads`:: -Number of concurrent downloads for fetching content. -Default value is `100`. - -`use_text_extraction_service`:: -Requires a separate deployment of the <>. Requires that ingest pipeline settings disable text extraction. -Default value is `False`. - -[discrete#es-connectors-azure-blob-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-azure-blob-client-documents-syncs] -===== Documents and syncs - -The connector will fetch all data available in the container. - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. -* Permissions are not synced. -**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-azure-blob-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-azure-blob-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. -Currently filtering is controlled via ingest pipelines. - -[discrete#es-connectors-azure-blob-client-content-extraction] -===== Content extraction - -See <>. - -[discrete#es-connectors-azure-blob-client-testing] -===== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the Azure Blob Storage connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=azure_blob_storage ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=azure_blob_storage DATA_SIZE=small ----- - -[discrete#es-connectors-azure-blob-client-known-issues] -===== Known issues - -This connector has the following known issues: - -* *`lease data` and `tier` fields are not updated in Elasticsearch indices* -+ -This is because the blob timestamp is not updated. -Refer to https://github.com/elastic/connectors/issues/289[Github issue^]. - -[discrete#es-connectors-azure-blob-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-azure-blob-client-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-box.asciidoc b/docs/reference/connector/docs/connectors-box.asciidoc deleted file mode 100644 index 265ccd52059f9..0000000000000 --- a/docs/reference/connector/docs/connectors-box.asciidoc +++ /dev/null @@ -1,379 +0,0 @@ -[#es-connectors-box] -=== Elastic Box connector reference -++++ -Box -++++ -// Attributes used in this file -:service-name: Box -:service-name-stub: box - -Th Box connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/box.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-box-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-box-availability-and-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* as of Elastic version *8.14.0*. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[NOTE] -==== -This connector is in *technical preview* and is subject to change. -The design and code is less mature than official GA features and is being provided as-is with no warranties. -Technical preview features are not subject to the support SLA of official GA features. -==== - -[discrete#es-connectors-box-create-connector-native] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-box-usage] -===== Usage - -To use this connector as a *self-managed connector*, use the *Box* tile from the connectors list OR *Customized connector* workflow. - -For additional operations, see <>. - -[discrete#es-connectors-box-api-authorization] -===== Box API Authorization - -[discrete#es-connectors-box-free-account] -====== Box Free Account - -[discrete#es-connectors-box-create-oauth-custom-app] -*Create Box User Authentication (OAuth 2.0) Custom App* - -You'll need to create an OAuth app in the Box developer console by following these steps: - -1. Register a new app in the https://app.box.com/developers/console[Box dev console] with custom App and select User authentication (OAuth 2.0). -2. Add the URL of the web page in *Redirect URIs*, which is accessible by you. -3. Check "Write all files and folders stored in Box" in Application Scopes. -4. Once the app is created, *Client ID* and *Client secret* values are available in the configuration tab. Keep these handy. - -[discrete#es-connectors-box-connector-generate-a-refresh-token] -*Generate a refresh Token* - -To generate a refresh token, follow these steps: - -1. Go to the following URL, replacing `` with the *Client ID* value saved earlier. -For example: -+ -[source,bash] ----- -https://account.box.com/api/oauth2/authorize?response_type=code&client_id= ----- -+ -2. Grant access to your application. -3. You will now be redirected to the web page that you configured in *Redirect URIs*, and the HTTP response should contain an *authorization code* that you'll use to generate a refresh token. -*Note:* Authorization codes to generate refresh tokens can only be used once and are only valid for 30 seconds. -4. In your terminal, run the following `curl` command, replacing ``, ` and ` with the values you saved earlier: -+ -[source,bash] ----- -curl -i -X POST "https://api.box.com/oauth2/token" \ - -H "Content-Type: application/x-www-form-urlencoded" \ - -d "client_id=" \ - -d "client_secret=" \ - -d "code=" \ - -d "grant_type=authorization_code" ----- -+ -Save the refresh token from the response. You'll need this for the connector configuration. - -[discrete#es-connectors-box-enterprise-account] -====== Box Enterprise Account - -[discrete#es-connectors-box-connector-create-box-server-authentication-client-credentials-grant-custom-app] -*Create Box Server Authentication (Client Credentials Grant) Custom App* - -1. Register a new app in the https://app.box.com/developers/console[Box dev console] with custom App and select Server Authentication (Client Credentials Grant). -2. Check following permissions: -+ -* "Write all files and folders stored in Box" in Application Scopes -* "Make API calls using the as-user header" in Advanced Features -3. Select `App + Enterprise Access` in App Access Level. -4. Authorize your application from the admin console. If you do not have permission, you may need to submit the application for authorization. -Save the *Client Credentials* and *Enterprise ID*. You'll need these to configure the connector. - -[discrete#es-connectors-box-configuration] -===== Configuration - -`Box Account` (required):: -Dropdown to determine Box Account type: `Box Free Account` or `Box Enterprise Account`. Default value is `Box Free Account`. - -`Client ID` (required):: -The Client ID to authenticate with Box instance. - -`Client Secret` (required):: -The Client Secret to authenticate with Box instance. - -`Refresh Token` (required if Box Account is Box Free):: -The Refresh Token to generate Access Token. -*NOTE:* If the process terminates, you'll need to generate a new refresh token. - -`Enterprise ID` (required if Box Account is Box Enterprise):: -The Enterprise ID to authenticate with Box instance. - -[discrete#es-connectors-box-content-extraction] -===== Content Extraction - -Refer to <>. - -[discrete#es-connectors-box-documents-and-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Files* -* *Folders* - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-box-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-box-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-box-advanced-sync-rules] -===== Advanced Sync Rules - -Advanced sync rules are not available for this connector in the present version. - -[discrete#es-connectors-box-known-issues] -===== Known issues - -There are no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-box-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-box-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-box-connector-client-reference] -==== *Self-managed connector reference* - -.View *self-managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-box-client-availability-and-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. To use this connector, satisfy all <>. - -[NOTE] -==== -This connector is in *technical preview* and is subject to change. -The design and code is less mature than official GA features and is being provided as-is with no warranties. -Technical preview features are not subject to the support SLA of official GA features. -==== - -[discrete#es-connectors-box-client-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-box-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, use the *Box* tile from the connectors list OR *Customized connector* workflow. - -For additional operations, see <>. - -[discrete#es-connectors-box-client-api-authorization] -===== Box API Authorization - -[discrete#es-connectors-box-client-free-account] -====== Box Free Account - -[discrete#es-connectors-box-client-create-oauth-custom-app] -*Create Box User Authentication (OAuth 2.0) Custom App* - -You'll need to create an OAuth app in the Box developer console by following these steps: - -1. Register a new app in the https://app.box.com/developers/console[Box dev console] with custom App and select User authentication (OAuth 2.0). -2. Add the URL of the web page in *Redirect URIs*, which is accessible by you. -3. Check "Write all files and folders stored in Box" in Application Scopes. -4. Once the app is created, *Client ID* and *Client secret* values are available in the configuration tab. Keep these handy. - -[discrete#es-connectors-box-client-connector-generate-a-refresh-token] -*Generate a refresh Token* - -To generate a refresh token, follow these steps: - -1. Go to the following URL, replacing `` with the *Client ID* value saved earlier. -For example: -+ -[source,bash] ----- -https://account.box.com/api/oauth2/authorize?response_type=code&client_id= ----- -+ -2. Grant access to your application. -3. You will now be redirected to the web page that you configured in *Redirect URIs*, and the HTTP response should contain an *authorization code* that you'll use to generate a refresh token. -*Note:* Authorization codes to generate refresh tokens can only be used once and are only valid for 30 seconds. -4. In your terminal, run the following `curl` command, replacing ``, ` and ` with the values you saved earlier: -+ -[source,bash] ----- -curl -i -X POST "https://api.box.com/oauth2/token" \ - -H "Content-Type: application/x-www-form-urlencoded" \ - -d "client_id=" \ - -d "client_secret=" \ - -d "code=" \ - -d "grant_type=authorization_code" ----- -+ -Save the refresh token from the response. You'll need this for the connector configuration. - -[discrete#es-connectors-box-client-enterprise-account] -====== Box Enterprise Account - -[discrete#es-connectors-box-client-connector-create-box-server-authentication-client-credentials-grant-custom-app] -*Create Box Server Authentication (Client Credentials Grant) Custom App* - -1. Register a new app in the https://app.box.com/developers/console[Box dev console] with custom App and select Server Authentication (Client Credentials Grant). -2. Check following permissions: -+ -* "Write all files and folders stored in Box" in Application Scopes -* "Make API calls using the as-user header" in Advanced Features -3. Select `App + Enterprise Access` in App Access Level. -4. Authorize your application from the admin console. If you do not have permission, you may need to submit the application for authorization. -Save the *Client Credentials* and *Enterprise ID*. You'll need these to configure the connector. - -[discrete#es-connectors-box-client-configuration] -===== Configuration - -`Box Account` (required):: -Dropdown to determine Box Account type: `Box Free Account` or `Box Enterprise Account`. Default value is `Box Free Account`. - -`Client ID` (required):: -The Client ID to authenticate with Box instance. - -`Client Secret` (required):: -The Client Secret to authenticate with Box instance. - -`Refresh Token` (required if Box Account is Box Free):: -The Refresh Token to generate Access Token. -*NOTE:* If the process terminates, you'll need to generate a new refresh token. - -`Enterprise ID` (required if Box Account is Box Enterprise):: -The Enterprise ID to authenticate with Box instance. - -[discrete#es-connectors-box-client-client-docker] -====== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-box-client-content-extraction] -===== Content Extraction - -Refer to <>. - -[discrete#es-connectors-box-client-documents-and-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Files* -* *Folders* - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-box-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-box-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-box-client-advanced-sync-rules] -===== Advanced Sync Rules - -Advanced sync rules are not available for this connector in the present version. - -[discrete#es-connectors-box-client-end-to-end-testing] -===== End-to-end Testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the Box connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=box ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=box DATA_SIZE=small ----- - -[discrete#es-connectors-box-client-known-issues] -===== Known issues - -There are no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-box-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-box-client-security] -===== Security - -See <>. - - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-confluence.asciidoc b/docs/reference/connector/docs/connectors-confluence.asciidoc deleted file mode 100644 index 61946745195fc..0000000000000 --- a/docs/reference/connector/docs/connectors-confluence.asciidoc +++ /dev/null @@ -1,541 +0,0 @@ -[#es-connectors-confluence] -=== Elastic Confluence connector reference -++++ -Confluence -++++ -// Attributes used in this file -:service-name: Confluence -:service-name-stub: confluence - -The _Elastic Confluence connector_ is a <> for https://www.atlassian.com/software/confluence[Atlassian Confluence^]. -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-confluence-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-confluence-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* on Elastic Cloud, as of *8.9.1*. - -[NOTE] -==== -Confluence Data Center support was added in 8.13.0 in technical preview and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Technical preview features are not subject to the support SLA of official GA features. -==== - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[discrete#es-connectors-confluence-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-confluence-usage] -===== Usage - -To use this connector as a *managed connector*, see <>. - -For additional operations, see <>. - -[discrete#es-connectors-confluence-compatability] -===== Compatibility - -* Confluence Cloud or Confluence Server/Data Center *versions 7 or later*. - -[discrete#es-connectors-confluence-configuration] -===== Configuration - -The following configuration fields are required to set up the connector: - -Confluence data source:: -Dropdown to determine the Confluence platform type: `Confluence Cloud`, `Confluence Server`, or `Confluence Data Center`. Default value is `Confluence Server`. - -Confluence Data Center username:: -The username of the account for Confluence Data Center. - -Confluence Data Center password:: -The password of the account to be used for the Confluence Data Center. - -Confluence Server username:: -The username of the account for Confluence server. - -Confluence Server password:: -The password of the account to be used for Confluence Server. - -Confluence Cloud account email:: -The account email for Confluence Cloud. - -Confluence Cloud API token:: -The API Token to authenticate with Confluence cloud. - -Confluence URL label:: -The domain where the Confluence is hosted. Examples: - -* `https://192.158.1.38:8080/` -* `https://test_user.atlassian.net/` - -Confluence space keys:: -Comma-separated list of https://confluence.atlassian.com/doc/space-keys-829076188.html[Space Keys] to fetch data from Confluence server or cloud. If the value is `*`, the connector will fetch data from all spaces present in the configured `spaces`. Default value is `*`. Examples: -+ -* `EC`, `TP` -* `*` - -Enable indexing labels:: -Toggle to enable syncing of labels from pages. -NOTE: This will increase the amount of network calls to the source, and may decrease performance. - -Enable SSL:: -Whether SSL verification will be enabled. Default value is `False`. - -SSL certificate:: -Content of SSL certificate. Note: If `ssl_enabled` is `False`, the value in this field is ignored. Example certificate: -+ -[source, txt] ----- ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- ----- - -Enable document level security:: -Toggle to enable <>. -When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -Access control syncs will fetch users' access control lists and store them in a separate index. -[NOTE] -==== -To access user data in Jira Administration, the account you created must be granted *Product Access* for Jira Administration. -This access needs to be provided by an administrator from the http://admin.atlassian.com/[Atlassian Admin], and the access level granted should be `Product Admin`. -==== - -[discrete#es-connectors-confluence-documents-syncs] -===== Documents and syncs - -The connector syncs the following Confluence object types: - -* Pages -* Spaces -* Blog Posts -* Attachments - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced by default. -You must first enable <>. -Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-confluence-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-confluence-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -This connector supports <> for remote filtering. -These rules cover complex query-and-filter scenarios that cannot be expressed with = now('-5w')" - }, - { - "query": "lastmodified < startOfYear()" - } -] ----- -// NOTCONSOLE - -*Example 3*: Query for indexing only given types in a *Space* with key 'SD'. - -[source,js] ----- -[ - { - "query": "type in ('page', 'attachment') AND space.key = 'SD'" - } -] ----- -// NOTCONSOLE - -[NOTE] -==== -Syncing recently created/updated items in Confluence may be delayed when using advanced sync rules, because the search endpoint used for CQL queries returns stale results in the response. -For more details refer to the following issue in the https://jira.atlassian.com/browse/CONFCLOUD-73997[Confluence documentation^]. -==== - -[discrete#es-connectors-confluence-document-level-security] -===== Document level security - -[NOTE] -==== -DLS is automatically available for Atlassian Confluence Cloud since 8.9.0. -DLS is available since 8.14.0 for Confluence Server and Confluence Data Center, but requires installing https://marketplace.atlassian.com/apps/1217507/extender-for-confluence?tab=overview&hosting=datacenter[Extender for Confluence]. -==== - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. -Refer to <> on this page for how to enable DLS for this connector. - - -[WARNING] -==== -When the `data_source` is set to Confluence Data Center or Server, the connector will only fetch 1000 users for access control syncs, due a https://auth0.com/docs/manage-users/user-search/retrieve-users-with-get-users-endpoint#limitations[limitation in the API used^]. -==== - -[NOTE] -==== -Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. -The example uses SharePoint Online as the data source, but the same steps apply to every connector. -==== - -[discrete#es-connectors-confluence-content-extraction] -===== Content Extraction - -See <>. - -[discrete#es-connectors-confluence-known-issues] -===== Known issues - -There are currently no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-confluence-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-confluence-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-confluence-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-confluence-client-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a *self-managed connector* using the *Elastic connector framework*. -This self-managed connector is compatible with Elastic versions *8.7.0+*. - -[NOTE] -==== -Confluence Data Center support was added in 8.13.0 in technical preview and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Technical preview features are not subject to the support SLA of official GA features. -==== - -To use this connector, satisfy all <>. - -[discrete#es-connectors-confluence-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-confluence-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, see <> -For additional usage operations, see <>. - -[discrete#es-connectors-confluence-client-compatability] -===== Compatibility - -* Confluence Cloud or Confluence Server/Data Center *versions 7 or later* - -[discrete#es-connectors-confluence-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/confluence.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required to set up the connector: - -`data_source`:: -Dropdown to determine the Confluence platform type: `Confluence Cloud`, `Confluence Server`, or `Confluence Data Center`. Default value is `Confluence Server`. - -`data_center_username`:: -The username of the account for Confluence Data Center. - -`data_center_password`:: -The password of the account to be used for the Confluence Data Center. - -`username`:: -The username of the account for Confluence Server. - -`password`:: -The password of the account to be used for the Confluence server. - -`account_email`:: -The account email for the Confluence Cloud. - -`api_token`:: -The API Token to authenticate with Confluence Cloud. - -`confluence_url`:: -The domain where the Confluence instance is hosted. Examples: - -* `https://192.158.1.38:8080/` -* `https://test_user.atlassian.net/` - -`spaces`:: -Comma-separated list of https://confluence.atlassian.com/doc/space-keys-829076188.html[Space Keys] to fetch data from Confluence. If the value is `*`, the connector will fetch data from all spaces present in the configured `spaces`. Default value is `*`. Examples: -+ -* `EC`, `TP` -* `*` - -`index_labels`:: -Toggle to enable syncing of labels from pages. -NOTE: This will increase the amount of network calls to the source, and may decrease performance. - -`ssl_enabled`:: -Whether SSL verification will be enabled. Default value is `False`. - -`ssl_ca`:: -Content of SSL certificate. Note: If `ssl_enabled` is `False`, the value in this field is ignored. Example certificate: -+ -[source, txt] ----- ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- ----- - -`retry_count`:: -The number of retry attempts after failed request to Confluence. Default value is `3`. - -`concurrent_downloads`:: -The number of concurrent downloads for fetching the attachment content. This speeds up the content extraction of attachments. Defaults to `50`. - -`use_document_level_security`:: -Toggle to enable <>. -+ -When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -Access control syncs will fetch users' access control lists and store them in a separate index. -+ -[NOTE] -==== -To access user data in Jira Administration, the account you created must be granted *Product Access* for Jira Administration. -This access needs to be provided by an administrator from the http://admin.atlassian.com/[Atlassian Admin], and the access level granted should be `Product Admin`. -==== - -`use_text_extraction_service`:: -Toggle to enable the local text extraction service. Default value is `False`. -Requires a separate deployment of the Elastic Text Extraction Service. -Requires that ingest pipeline settings disable text extraction. - - -[discrete#es-connectors-confluence-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-confluence-client-documents-syncs] -===== Documents and syncs - -The connector syncs the following Confluence object types: - -* Pages -* Spaces -* Blog Posts -* Attachments - -[NOTE] -==== -* Content of files bigger than 10 MB won't be extracted. -* Permissions are not synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-confluence-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-confluence-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -This connector supports <> for remote filtering. -These rules cover complex query-and-filter scenarios that cannot be expressed with = now('-5w')" - }, - { - "query": "lastmodified < startOfYear()" - } -] ----- -// NOTCONSOLE - -*Example 3*: Query for indexing only given types in a *Space* with key 'SD'. - -[source,js] ----- -[ - { - "query": "type in ('page', 'attachment') AND space.key = 'SD'" - } -] ----- -// NOTCONSOLE - -[NOTE] -==== -Syncing recently created/updated items in Confluence may be delayed when using advanced sync rules, because the search endpoint used for CQL queries returns stale results in the response. -For more details refer to the following issue in the https://jira.atlassian.com/browse/CONFCLOUD-73997[Confluence documentation^]. -==== - -[discrete#es-connectors-confluence-client-document-level-security] -===== Document level security - -[NOTE] -==== -DLS is automatically available for Atlassian Confluence Cloud since 8.9.0. -DLS is available since 8.14.0 for Confluence Server and Confluence Data Center, but requires installing https://marketplace.atlassian.com/apps/1217507/extender-for-confluence?tab=overview&hosting=datacenter[Extender for Confluence]. -==== - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. -Refer to <> on this page for how to enable DLS for this connector. - -[WARNING] -==== -When the `data_source` is set to Confluence Data Center or Server, the connector will only fetch 1000 users for access control syncs, due a https://auth0.com/docs/manage-users/user-search/retrieve-users-with-get-users-endpoint#limitations[limitation in the API used^]. -==== - -[NOTE] -==== -Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. -The example uses SharePoint Online as the data source, but the same steps apply to every connector. -==== - -[discrete#es-connectors-confluence-client-content-extraction] -===== Content Extraction - -See <>. - -[discrete#es-connectors-confluence-client-connector-client-operations] -===== Self-managed connector operations - -[discrete#es-connectors-confluence-client-testing] -===== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the Confluence connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=confluence ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=confluence DATA_SIZE=small ----- - -[discrete#es-connectors-confluence-client-known-issues] -===== Known issues - -There are currently no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-confluence-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-confluence-client-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-content-extraction.asciidoc b/docs/reference/connector/docs/connectors-content-extraction.asciidoc deleted file mode 100644 index 744fe1d87cb45..0000000000000 --- a/docs/reference/connector/docs/connectors-content-extraction.asciidoc +++ /dev/null @@ -1,360 +0,0 @@ -[#es-connectors-content-extraction] -=== Content extraction - -Connectors use the {ref}/attachment.html[Elastic ingest attachment processor^] to extract file contents. -The processor extracts files using the https://tika.apache.org[Apache Tika^] text extraction library. -The logic for content extraction is defined in {connectors-python}/connectors/utils.py[utils.py^]. - -While intended primarily for PDF and Microsoft Office formats, you can use any of the <>. - -Enterprise Search uses an {ref}/ingest.html[Elasticsearch ingest pipeline^] to power the web crawler's binary content extraction. -The default pipeline, `search-default-ingestion`, is automatically created when Enterprise Search first starts. - -You can {ref}/ingest.html#create-manage-ingest-pipelines[view^] this pipeline in Kibana. -Customizing your pipeline usage is also an option. -See {ref}/ingest-pipeline-search.html[Ingest pipelines for Search indices]. - -For advanced use cases, the <> can be used to extract content from files larger than 10MB. - -[discrete#es-connectors-content-extraction-supported-file-types] -==== Supported file types - -The following file types are supported: - -* `.txt` -* `.py` -* `.rst` -* `.html` -* `.markdown` -* `.json` -* `.xml` -* `.csv` -* `.md` -* `.ppt` -* `.rtf` -* `.docx` -* `.odt` -* `.xls` -* `.xlsx` -* `.rb` -* `.paper` -* `.sh` -* `.pptx` -* `.pdf` -* `.doc` - -[NOTE] -==== -The ingest attachment processor does not support compressed files, e.g., an archive file containing a set of PDFs. -Expand the archive file and make individual uncompressed files available for the connector to process. -==== - -[discrete#es-connectors-content-extraction-local] -==== Extraction Service - -[NOTE] -==== -Currently, content extraction from large files via the Extraction Service is available for a subset of our **self-managed connectors**. -It is not available for Elastic managed connectors running on Elastic Cloud. -This feature is in *beta*. -==== - -Standard content extraction is done via the Attachment Processor, through Elasticsearch Ingest Pipelines. -The self-managed connector limits file sizes for pipeline extraction to 10MB per file (Elasticsearch also has a hard limit of 100MB per file). - -For use cases that require extracting content from files larger than these limits, the *self-managed extraction service* can be used for self-managed connectors. -Instead of sending the file as an `attachment` to Elasticsearch, the file's content is extracted at the edge by the extraction service before ingestion. -The extracted text is then included as the `body` field of a document when it is ingested. - -To use this feature, you will need to do the following: - -* <> -* <> -* Set the value of the configurable field `use_text_extraction_service` to `true` - -[TIP] -==== -The data extraction service code is now available in this public repository: https://github.com/elastic/data-extraction-service. -==== - -[discrete#es-connectors-content-extraction-available-connectors] -===== Available connectors - -Local content extraction is available for the following self-managed connectors: - -include::_connectors-list-local-content-extraction.asciidoc[] - -[discrete#es-connectors-content-extraction-data-extraction-service] -===== Running the extraction service - -Self-hosted content extraction is handled by a *separate* extraction service. - -The versions for the extraction service do not align with the Elastic stack. -For versions after `8.11.x` (including {version}), you should use extraction service version `0.3.x`. - -You can run the service with the following command: - -[source,bash] ----- -$ docker run \ - -p 8090:8090 \ - -it \ - --name extraction-service \ - docker.elastic.co/integrations/data-extraction-service:$EXTRACTION_SERVICE_VERSION ----- - -[discrete#es-connectors-extraction-service-configuration] -===== Configuring the extraction service - -You can enable your self-managed connector to use the self-hosted extraction service by adding the required configuration. -The self-managed connector determines if the extraction service is enabled by the presence of these fields in the configuration file. - -1. Open the `config.yml` configuration file in your text editor of choice. -2. Add the following fields. They can be added anywhere in the file, so long as they begin at the root level. - -[source,yaml] ----- -# data-extraction-service settings -extraction_service: - host: http://localhost:8090 ----- - -[NOTE] -==== -There is no password protection between the self-managed connector and the extraction service. -Self-hosted extraction should only be used if the two services are running on the same network and behind the same firewall. -==== - -[options="header"] -|======= -|Field|Description -|`host`|The endpoint for the extraction service. `http://localhost:8090` can be used if it is running on the same server as your self-managed connector. -|======= - -The self-managed connector will perform a preflight check against the configured `host` value at startup. -The following line will be output to the log if the data extraction service was found and is running normally. - -[source,bash] ----- -Data extraction service found at . ----- - -If you don't see this log at startup, refer to <>. - -[discrete#es-connectors-content-extraction-advanced-configuration] -====== Advanced configuration - -The following fields can be included in the configuration file. -They are optional and will fallback on default values if they are not specified. - -[source,yaml] ----- -# data-extraction-service settings -extraction_service: - host: http://localhost:8090 - timeout: 30 - use_file_pointers: false - stream_chunk_size: 65536 - shared_volume_dir: '/app/files' ----- - -[options="header"] -|======= -|Advanced Field|Description -|`timeout`|Timeout limit in seconds for content extraction. Defaults to `30` if not set. Increase this if you have very large files that timeout during content extraction. In the event of a timeout, the indexed document's `body` field will be an empty string. -|`use_file_pointers`|Whether or not to use file pointers instead of sending files to the extraction service. Defaults to `false`. Refer to <> for more details about this setting. -|`stream_chunk_size`|The size that files are chunked to facilitate streaming to extraction service, in bytes. Defaults to 65536 (64 KB). Only applicable if `use_file_pointers` is `false`. Increasing this value may speed up the connector, but will also increase memory usage. -|`shared_volume_dir`|The shared volume from which the data extraction service will extract files. Defaults to `/app/files`. Only applicable if `use_file_pointers` is `true`. -|======= - -[discrete#es-connectors-content-extraction-data-extraction-service-file-pointers] -===== Using file pointers - -The self-hosted extraction service can be set up to use file pointers instead of sending files via HTTP requests. -File pointers are faster than sending files and consume less memory, but require the connector framework and the extraction service to be able to share a file system. -This can be set up with both a dockerized and non-dockerized self-managed connector. - -[discrete#es-connectors-content-extraction-data-extraction-service-file-pointers-configuration] -====== Configuration for non-dockerized self-managed connectors - -If you are running a non-dockerized version of the self-managed connector, you need to determine the local directory where you'll download files for extraction. -This can be anywhere on your file system that you are comfortable using. -Be aware that the self-managed connector will download files with randomized filenames to this directory, so there is a chance that any files already present will be overwritten. -For that reason, we recommend using a dedicated directory for self-hosted extraction. - -[discrete#es-connectors-content-extraction-data-extraction-service-file-pointers-configuration-example] -*Example* - -1. For this example, we will be using `/app/files` as both our local directory and our container directory. -When you run the extraction service docker container, you can mount the directory as a volume using the command-line option `-v /app/files:/app/files`. -+ -[source,bash] ----- -$ docker run \ - -p 8090:8090 \ - -it \ - -v /app/files:/app/files \ - --name extraction-service \ - docker.elastic.co/integrations/data-extraction-service:$EXTRACTION_SERVICE_VERSION ----- -+ -[NOTE] -==== -Due to how this feature works in the codebase for non-dockerized setups, **the local filepath and the docker container's filepath must be identical**. -For example, if using `/app/files`, you must mount the directory as `-v /app/files:/app/files`. -If either directory is different, the self-managed connector will be unable to provide an accurate file pointer for the extraction service. This is not a factor when using a dockerized self-managed connector. -==== -+ -2. Next, before running the self-managed connector, be sure to update the config file with the correct information. -+ -[source,yaml] ----- -# data-extraction-service settings -extraction_service: - host: http://localhost:8090 - use_file_pointers: true - shared_volume_dir: '/app/files' ----- -+ -3. Then all that's left is to start the self-managed connector and run a sync. -If you encounter any unexpected errors, refer to <>. - -[discrete#es-connectors-content-extraction-data-extraction-service-file-pointers-configuration-dockerized] -====== Configuration for dockerized self-managed connectors - -When using self-hosted extraction from a dockerized self-managed connector, there are a few extra steps required on top of {connectors-python}/docs/DOCKER.md[running the self-managed connector in docker^]. - -* The self-hosted extraction service will need to share the same network that the self-managed connector and Elasticsearch are sharing. -* The self-managed connector and the extraction service will also need to share a volume. You can decide what directory inside these docker containers the volume will be mounted onto, but the directory must be the same for both docker containers. - -[discrete#es-connectors-content-extraction-data-extraction-service-file-pointers-configuration-dockerized-example] -*Example* - -1. First, set up a volume for the two docker containers to share. -This will be where files are downloaded into and then extracted from. -+ -[source,bash] ----- -$ docker volume create --name extraction-service-volume ----- -+ -2. If you haven't set up a network yet, you can create it now. -+ -[source,bash] ----- -$ docker network create elastic ----- -+ -3. Include the docker volume name and the network as arguments when running the extraction service. -For this example, we will be using `/app/files` as our container directory. -+ -[source,bash] ----- -$ docker run \ - -p 8090:8090 \ - -it \ - -v extraction-service-volume:/app/files \ - --network "elastic" \ - --name extraction-service \ - docker.elastic.co/integrations/data-extraction-service:$EXTRACTION_SERVICE_VERSION ----- -+ -4. Next, you can follow the instructions for {connectors-python}/docs/DOCKER.md[running the self-managed connector in docker^] until step `4. Update the configuration file for your self-managed connector`. -When setting up your configuration, be sure to add the following settings for the self-hosted content extraction service. -Note that the `host` will now refer to an internal docker endpoint instead of localhost. -+ -[source,yaml] ----- -# data-extraction-service settings -extraction_service: - host: http://host.docker.internal:8090 - use_file_pointers: true - shared_volume_dir: '/app/files' ----- -+ -5. Next, during step `5. Run the Docker image`, we only need to add our new shared volume in the run command using `-v extraction-service-volume:/app/files`. -+ -[source,bash] ----- -$ docker run \ - -v ~/connectors-config:/config \ - -v extraction-service-volume:/app/files \ - --network "elastic" \ - --tty \ - --rm \ - docker.elastic.co/integrations/elastic-connectors:$CONNECTOR_CLIENT_VERSION \ - /app/bin/elastic-ingest \ - -c /config/config.yml ----- -+ -6. Now the self-managed connector and extraction service docker containers should be set up to share files. -Run a test sync to make sure everything is configured correctly. -If you encounter any unexpected errors, refer to <>. - -[discrete#es-connectors-content-extraction-local-logs] -===== Self-hosted extraction service logs - -The extraction service produces two different log files that may be informative when troubleshooting. -These are saved at the following file locations internally in the docker container: - -* `/var/log/openresty.log` for request traffic logs -* `/var/log/tika.log` for tikaserver jar logs - -Logs can be viewed from outside of docker by combining `docker exec` with the `tail` command. - -[source,bash] ----- -$ docker exec extraction-service /bin/sh -c "tail /var/log/openresty.log" -$ docker exec extraction-service /bin/sh -c "tail /var/log/tika.log" ----- - -[discrete#es-connectors-content-extraction-troubleshooting] -===== Troubleshooting the self-hosted extraction service - -The following warning logs may appear while using self-hosted extraction service. -Each log in this section is followed by a description of what may have happened, and suggested fixes. - -[source,bash] ----- -Extraction service is not configured, skipping its preflight check. ----- - -The configuration file is missing the `extraction_service.host` field. -If you want to use this service, check that the configuration is formatted correctly and that the required field is present. - -[source,bash] ----- -Data extraction service found at , but health-check returned . ----- - -The `/ping` endpoint returned a non-`200` response. -This could mean that the extraction service is unhealthy and may need to be restarted, or that the configured `extraction_service.host` is incorrect. -You can find more information about what happened in the <>. - -[source,bash] ----- -Expected to find a running instance of data extraction service at but failed. . ----- - -The health check returned either a timeout or client connection error. - -* A timeout may be caused by the extraction service server not running, or not being accessible from the configured `host` in the configuration file. -* A server connection error is an internal error on the extraction service. You will need to investigate the <>. - -[source,bash] ----- -Extraction service has been initialised but no extraction service configuration was found. No text will be extracted for this sync. ----- - -You have enabled self-hosted extraction service for the connector, but the configuration file is missing the `extraction_service.host` field. -Check that the configuration is formatted correctly and that the required field is present. - -[source,bash] ----- -Extraction service could not parse . Status: ; : . ----- - -This warning will appear every time a file is not extractable. -Generally the `` will provide an explanation for why extraction failed. -Contact support if the message is unclear. -When a file fails extraction, it will be indexed with an empty string in the `body` field. diff --git a/docs/reference/connector/docs/connectors-content-syncs.asciidoc b/docs/reference/connector/docs/connectors-content-syncs.asciidoc deleted file mode 100644 index 0a2eb54047170..0000000000000 --- a/docs/reference/connector/docs/connectors-content-syncs.asciidoc +++ /dev/null @@ -1,64 +0,0 @@ -[#es-connectors-sync-types] -== Content syncs - -Elastic connectors have two types of content syncs: - -* <> -* <> - -[discrete#es-connectors-sync-types-full] -=== Full syncs - -[NOTE] -==== -We recommend running a full sync whenever <> are modified -==== - -A full sync syncs all documents in the third-party data source into {es}. - -It also deletes any documents in {es}, which no longer exist in the third-party data source. - -A full sync, by definition, takes longer than an incremental sync but it ensures full data consistency. - -A full sync is available for all connectors. - -You can <> or <> a full sync job. - -[discrete#es-connectors-sync-types-incremental] -=== Incremental syncs - -An incremental sync only syncs data changes since the last full or incremental sync. - -Incremental syncs are only available after an initial full sync has successfully completed. -Otherwise the incremental sync will fail. - -You can <> or <> an incremental sync job. - -[discrete#es-connectors-sync-types-incremental-performance] -==== Incremental sync performance - -During an incremental sync your connector will still _fetch_ all data from the third-party data source. -If data contains timestamps, the connector framework compares document ids and timestamps. -If a document already exists in {es} with the same timestamp, then this document does not need updating and will not be sent to {es}. - -The determining factor in incremental sync performance is the raw volume of data ingested. -For small volumes of data, the performance improvement using incremental syncs will be negligible. -For large volumes of data, the performance impact can be huge. -Additionally, an incremental sync is less likely to be throttled by {es}, making it more performant than a full sync when {es} is under heavy load. - -A third-party data source that has throttling and low throughput, but stores very little data in Elasticsearch, such as GitHub, Jira, or Confluence, won't see a significant performance improvement from incremental syncs. - -However, a fast, accessible third-party data source that stores huge amounts of data in {es}, such as Azure Blob Storage, Google Drive, or S3, can lead to a significant performance improvement from incremental syncs. - -[NOTE] -==== -Incremental syncs for <> and <> connectors use specific logic. -All other connectors use the same shared connector framework logic for incremental syncs. -==== - -[discrete#es-connectors-sync-types-incremental-supported] -==== Incremental sync availability - -Incremental syncs are available for the following connectors: - -include::_connectors-list-incremental.asciidoc[] diff --git a/docs/reference/connector/docs/connectors-docker-compose-quickstart.asciidoc b/docs/reference/connector/docs/connectors-docker-compose-quickstart.asciidoc deleted file mode 100644 index 52a8921c90ec1..0000000000000 --- a/docs/reference/connector/docs/connectors-docker-compose-quickstart.asciidoc +++ /dev/null @@ -1,10 +0,0 @@ -[#es-connectors-docker-compose-quickstart] -=== Docker Compose quickstart - -Use our Docker Compose quickstart to easily set up a full self-managed stack and try out Elastic Connectors. - -We've provided a script to start up and run Elasticsearch, Kibana, and Connectors instances using Docker Compose. -The script prompts you to configure your Connectors before starting. -Additionally, you can use the same set of scripts to manually configure your Connectors and run the stack. - -Refer to the https://github.com/elastic/connectors/tree/main/scripts/stack#readme[README^] in the https://github.com/elastic/connectors[elastic/connectors^] source repository for more information. diff --git a/docs/reference/connector/docs/connectors-dropbox.asciidoc b/docs/reference/connector/docs/connectors-dropbox.asciidoc deleted file mode 100644 index 295b7e2936625..0000000000000 --- a/docs/reference/connector/docs/connectors-dropbox.asciidoc +++ /dev/null @@ -1,580 +0,0 @@ -[#es-connectors-dropbox] -=== Elastic Dropbox connector reference -++++ -Dropbox -++++ -// Attributes used in this file -:service-name: Dropbox -:service-name-stub: dropbox - -The _Elastic Dropbox connector_ is a <> for https://www.dropbox.com[Dropbox^]. -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-dropbox-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-dropbox-availability-and-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* in Elastic versions *8.10.0 and later*. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[discrete#es-connectors-dropbox-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-dropbox-usage] -===== Usage - -To use this connector as a *managed connector*, see <>. - -For additional operations, see <>. - -Before you can configure your connector, you'll need to: - -* <> -* <> - -[discrete#es-connectors-dropbox-dropbox-api-authorization] -===== Dropbox API Authorization - -[discrete#es-connectors-dropbox-create-dropbox-oauth-app] -====== Create Dropbox OAuth App - -You'll need to create an OAuth app in the Dropbox platform by following these steps: - -1. Register a new app in the https://www.dropbox.com/developers/apps[Dropbox App Console^]. -Select *Full Dropbox API app* and choose the following required permissions: -* `files.content.read` -* `sharing.read` -+ -To use document level security, you'll also need the following permissions: -* `team_info.read` -* `team_data.member` -* `team_data.content.read` -* `members.read` -2. Once the app is created, make note of the *app key* and *app secret* values which you'll need to configure the Dropbox connector on your Elastic deployment. - -[discrete#es-connectors-dropbox-refresh-token] -====== Generate a refresh Token - -To generate a refresh token, follow these steps: - -1. Go to the following URL, replacing `` with the *app key* value saved earlier: -`https://www.dropbox.com/oauth2/authorize?client_id=&response_type=code&token_access_type=offline` -+ -The HTTP response should contain an *authorization code* that you'll use to generate a refresh token. -An authorization code *can only be used once* to create a refresh token. -+ -2. In your terminal, run the following `cURL` command, replacing ``, `:` with the values you saved earlier: -+ -[source,shell] ----- -curl -X POST "https://api.dropboxapi.com/oauth2/token?code=&grant_type=authorization_code" -u ":" ----- -// NOTCONSOLE -Store the refresh token from the response to be used in the connector configuration. -+ -Make sure the response has a list of the following scopes: -+ -* `account_info.read` -* `files.content.read` -* `files.metadata.read` -* `sharing.read` -* `team_info.read` (if using document level security) -* `team_data.member` (if using document level security) -* `team_data.content.read` (if using document level security) -* `members.read` (if using document level security) - -[discrete#es-connectors-dropbox-configuration] -===== Configuration - -The following configuration fields are required to set up the connector: - -Path to fetch files/folders:: -The folder path to fetch files/folders from Dropbox. Default value is `/`. -+ -[Note] -==== -This field can be bypassed by advanced sync rules. -==== - -App key:: -The App Key to authenticate your Dropbox application. - -App secret:: -The App Secret to authenticate your Dropbox application. - -Refresh token:: -The refresh token to authenticate your Dropbox application. - -Enable document level security:: -Toggle to enable <>. -When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -Access control syncs will fetch users' access control lists and store them in a separate index. - -Include groups and inherited users:: -Appears when document level security is enabled. -Include groups and inherited users when indexing permissions. - -[WARNING] -==== -Enabling `Include groups and inherited users` will cause a signficant performance degradation. -==== - -[discrete#es-connectors-dropbox-documents-and-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Files* -** Includes metadata such as file name, path, size, content, etc. -* *Folders* - -[NOTE] -==== -Due to a Dropbox issue, metadata updates to Paper files from Dropbox Paper are not immediately reflected in the Dropbox UI. -This delays the availability of updated results for the connector. -Once the metadata changes are visible in the Dropbox UI, the updates are available. -==== - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Currently, the connector doesn't retrieve files from shared Team folders. -* Permissions are not synced by default. If <> is not enabled *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-dropbox-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-dropbox-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-dropbox-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -The following section describes <> for this connector. - -Advanced sync rules for Dropbox allow you to sync Dropbox files based on a query that matches strings in the filename. -You can optionally filter the results of the query by `file_extensions` or `file_categories`. -When both are provided, priority is given to `file_categories`. -We have some examples below for illustration. - -[discrete#es-connectors-dropbox-sync-rules-advanced-example-1] -*Example: Query only* - -[source,js] ----- -[ - { - "query": "confidential" - }, - { - "query": "dropbox" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-dropbox-sync-rules-advanced-example-2] -*Example: Query with file extension filter* - -[source,js] ----- -[ - { - "query": "dropbox", - "options": { - "file_extensions": [ - "txt", - "pdf" - ] - } - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-dropbox-sync-rules-advanced-example-3] -*Example: Query with file category filter* - -[source,js] ----- -[ - { - "query": "test", - "options": { - "file_categories": [ - { - ".tag": "paper" - }, - { - ".tag": "png" - } - ] - } - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-dropbox-sync-rules-advanced-limitations] -*Limitations* - -* Content extraction is not supported for Dropbox *Paper* files when advanced sync rules are enabled. - -[discrete#es-connectors-dropbox-known-issues] -===== Known issues - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-dropbox-troubleshooting] -===== Troubleshooting - -See <> for a list of troubleshooting tips for all connectors. - -[discrete#es-connectors-dropbox-security] -===== Security - -See <> for a list of security tips for all connectors. - -[discrete#es-connectors-dropbox-content-extraction] -===== Content extraction - -See <>. -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-dropbox-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-dropbox-client-availability-and-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. - -This self-managed connector is compatible with Elastic versions *8.9.0*+. - -To use this connector, satisfy all <>. - -[discrete#es-connectors-dropbox-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-dropbox-client-usage] -===== Usage -Before you can configure your connector, you'll need to: - -* <> -* <> - - -To use this connector as a *self-managed connector*, see <> -Once set up, for additional usage operations, see <>. - -[discrete#es-connectors-dropbox-client-dropbox-api-authorization] -===== Dropbox API Authorization - -[discrete#es-connectors-dropbox-client-create-dropbox-oauth-app] -====== Create Dropbox OAuth App - -You'll need to create an OAuth app in the Dropbox platform by following these steps: - -1. Register a new app in the https://www.dropbox.com/developers/apps[Dropbox App Console^]. -Select *Full Dropbox API app* and choose the following required permissions: -* `files.content.read` -* `sharing.read` -+ -To use document level security, you'll also need the following permissions: -* `team_info.read` -* `team_data.member` -* `team_data.content.read` -* `members.read` -2. Once the app is created, make note of the *app key* and *app secret* values which you'll need to configure the Dropbox connector on your Elastic deployment. - -[discrete#es-connectors-dropbox-client-refresh-token] -====== Generate a refresh Token - -To generate a refresh token, follow these steps: - -1. Go to the following URL, replacing `` with the *app key* value saved earlier: -`https://www.dropbox.com/oauth2/authorize?client_id=&response_type=code&token_access_type=offline` -+ -The HTTP response should contain an *authorization code* that you'll use to generate a refresh token. -An authorization code *can only be used once* to create a refresh token. -+ -2. In your terminal, run the following `cURL` command, replacing ``, `:` with the values you saved earlier: -+ -[source,shell] ----- -curl -X POST "https://api.dropboxapi.com/oauth2/token?code=&grant_type=authorization_code" -u ":" ----- -// NOTCONSOLE -Store the refresh token from the response to be used in the connector configuration. -+ -Make sure the response has a list of the following scopes: -+ -* `account_info.read` -* `files.content.read` -* `files.metadata.read` -* `sharing.read` -* `team_info.read` (if using document level security) -* `team_data.member` (if using document level security) -* `team_data.content.read` (if using document level security) -* `members.read` (if using document level security) - -[discrete#es-connectors-dropbox-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/confluence.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required to set up the connector: - -`path`:: -The folder path to fetch files/folders from Dropbox. Default value is `/`. - -`app_key` (required):: -The App Key to authenticate your Dropbox application. - -`app_secret` (required):: -The App Secret to authenticate your Dropbox application. - -`refresh_token` (required):: -The refresh token to authenticate your Dropbox application. - -use_document_level_security:: -Toggle to enable <>. -When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -Access control syncs will fetch users' access control lists and store them in a separate index. - -`retry_count`:: -The number of retry attempts after a failed request to Dropbox. Default value is `3`. - -`concurrent_downloads`:: -The number of concurrent downloads for fetching attachment content. -This can help speed up content extraction of attachments. Defaults to `100`. - -`use_text_extraction_service`:: -Requires a separate deployment of the <>. -Requires that pipeline settings disable text extraction. -Default value is `False`. - -`use_document_level_security`:: -Toggle to enable <>. -When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. Access control syncs will fetch users' access control lists and store them in a separate index. - -`include_inherited_users_and_groups`:: -Depends on document level security being enabled. -Include groups and inherited users when indexing permissions. - -[WARNING] -==== -Enabling `Include groups and inherited users` will cause a signficant performance degradation. -==== - -[discrete#es-connectors-dropbox-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-dropbox-client-documents-and-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Files* -** Includes metadata such as file name, path, size, content, etc. -* *Folders* - -[NOTE] -==== -Due to a Dropbox issue, metadata updates to Paper files from Dropbox Paper are not immediately reflected in the Dropbox UI. -This delays the availability of updated results for the connector. -Once the metadata changes are visible in the Dropbox UI, the updates are available. -==== - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. -* Currently, the connector doesn't retrieve files from shared Team folders. -* Permissions are not synced by default. If <> is not enabled *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-dropbox-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-dropbox-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-dropbox-client-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -The following section describes <> for this connector. - -Advanced sync rules for Dropbox allow you to sync Dropbox files based on a query that matches strings in the filename. -You can optionally filter the results of the query by `file_extensions` or `file_categories`. -When both are provided, priority is given to `file_categories`. -We have some examples below for illustration. - -[discrete#es-connectors-dropbox-client-sync-rules-advanced-example-1] -*Example: Query only* - -[source,js] ----- -[ - { - "query": "confidential" - }, - { - "query": "dropbox" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-dropbox-client-sync-rules-advanced-example-2] -*Example: Query with file extension filter* - -[source,js] ----- -[ - { - "query": "dropbox", - "options": { - "file_extensions": [ - "txt", - "pdf" - ] - } - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-dropbox-client-sync-rules-advanced-example-3] -*Example: Query with file category filter* - -[source,js] ----- -[ - { - "query": "test", - "options": { - "file_categories": [ - { - ".tag": "paper" - }, - { - ".tag": "png" - } - ] - } - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-dropbox-client-sync-rules-advanced-limitations] -*Limitations* - -* Content extraction is not supported for Dropbox *Paper* files when advanced sync rules are enabled. - -[discrete#es-connectors-dropbox-client-end-to-end-testing] -===== End-to-end Testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the Dropbox connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=dropbox ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=dropbox DATA_SIZE=small ----- - -[discrete#es-connectors-dropbox-client-known-issues] -===== Known issues - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-dropbox-client-troubleshooting] -===== Troubleshooting - -See <> for a list of troubleshooting tips for all connectors. - -[discrete#es-connectors-dropbox-client-security] -===== Security - -See <> for a list of security tips for all connectors. - -[discrete#es-connectors-dropbox-client-content-extraction] -===== Content extraction - -See <>. - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-filter-extract-transform.asciidoc b/docs/reference/connector/docs/connectors-filter-extract-transform.asciidoc deleted file mode 100644 index 62a99928bfb46..0000000000000 --- a/docs/reference/connector/docs/connectors-filter-extract-transform.asciidoc +++ /dev/null @@ -1,53 +0,0 @@ -[#es-connectors-filter-extract-transform] -== Extract, filter, and transform content -++++ -Extract and transform -++++ - -Elastic connectors offer a number of tools for extracting, filtering, and transforming content from your third-party data sources. -Each connector has its own default logic, specific to the data source, and every Elastic Search deployment uses a default ingest pipeline to extract and transform data. -Several tools are also available for more advanced use cases. - -The following diagram provides an overview of how content extraction, sync rules, and ingest pipelines can be orchestrated in your connector's data pipeline. - -[.screenshot] -image::images/pipelines-extraction-sync-rules.png[Architecture diagram of data pipeline with content extraction, sync rules, and ingest pipelines] - -By default, only the connector specific logic (2) and the default `search-default-ingestion` pipeline (6) extract and transform your data, as configured in your deployment. - -The following tools are available for more advanced use cases: - -* *Advanced sync rules* (1). Remote filtering at the data source level, before data reaches the connector. -* *Basic sync rules* (4) or *extraction service* (3). Integration filtering controlled by the connector. -* *Ingest pipelines* (6). Customized pipeline filtering where {es} filters data _before_ indexing. - -Learn more in the following documentation links. - -[discrete#es-connectors-filter-extract-transform-content-extraction] -=== Content extraction - -Connectors have a default content extraction service, plus the <> for advanced use cases. - -Refer to <> for details. - -[discrete#es-connectors-filter-extract-transform-sync-rules] -=== Sync rules - -Use sync rules to help control which documents are synced between the third-party data source and Elasticsearch. -Sync rules enable you to filter data early in the data pipeline, which is more efficient and secure. - -* *Basic* sync rules are identical for all connectors. -* *Advanced sync rules* are data source-specific. -They cover complex query-and-filter scenarios, defined in a DSL JSON snippet. - -Refer to <> for details. - -[discrete#es-connectors-filter-extract-transform-ingest-pipelines] -=== Ingest pipelines - -Ingest pipelines are a user-defined sequence of processors that modify documents before they are indexed into Elasticsearch. -Use ingest pipelines for data enrichment, normalization, and more. - -Elastic connectors use a default ingest pipeline, which you can copy and customize to meet your needs. - -Refer to {ref}/ingest-pipeline-search.html[ingest pipelines in Search] in the {es} documentation. diff --git a/docs/reference/connector/docs/connectors-framework.asciidoc b/docs/reference/connector/docs/connectors-framework.asciidoc deleted file mode 100644 index b0a037d9ef468..0000000000000 --- a/docs/reference/connector/docs/connectors-framework.asciidoc +++ /dev/null @@ -1,27 +0,0 @@ -[#es-connectors-framework] -== Elastic connector framework: build and customize connectors -++++ -Build and customize connectors -++++ - -The Elastic connector framework enables developers to build Elastic-supported self-managed connectors which sync third-party data sources to Elasticsearch. -The framework implements common functionalities out of the box, so developers can focus on the logic specific to integrating their chosen data source. - -The framework ensures compatibility, makes it easier for our team to review PRs, and help out in the development process. -When you build using our framework, we provide a pathway for the connector to be officially supported by Elastic. - -[discrete#es-connectors-framework-use-cases] -=== Use cases - -The framework serves two distinct, but related use cases: - -* Customizing an existing Elastic <> -* Building a new self-managed connector - -[discrete#es-connectors-framework-learn-more] -=== Learn more - -To learn how to contribute connectors using the framework, refer to our https://github.com/elastic/connectors/blob/main/docs/CONTRIBUTING.md[contributing guide] in the `connectors` repository. -This guide explains how to get started and includes a contribution checklist and pull request guidelines. - -This repo contains all the source code for existing Elastic connectors. \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-github.asciidoc b/docs/reference/connector/docs/connectors-github.asciidoc deleted file mode 100644 index df577d83e8121..0000000000000 --- a/docs/reference/connector/docs/connectors-github.asciidoc +++ /dev/null @@ -1,697 +0,0 @@ -[#es-connectors-github] -=== Elastic GitHub connector reference -++++ -GitHub -++++ -// Attributes used in this file -:service-name: GitHub -:service-name-stub: github - -The _Elastic GitHub connector_ is a <> for https://www.github.com[GitHub^]. -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-github-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-github-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* as of Elastic version *8.11.0*. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[discrete#es-connectors-github-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-github-usage] -===== Usage - -To use this connector as a *managed connector*, see <>. - -For additional operations, see <>. - -[discrete#es-connectors-github-personal-access-token] -====== GitHub personal access token - -Configure a GitHub personal access token to fetch data from GitHub. - -Follow these steps to generate a GitHub personal access token: - -* Go to *GitHub Settings → Developer settings → Personal access tokens → Tokens(classic)*. -* Select `Generate new token`. -* Add a note and select the following scopes: -** `repo` -** `user` -** `read:org` -* Select `Generate token` and copy the token. - -[discrete#es-connectors-github-github-app] -====== GitHub App - -Configure a GitHub App to fetch data from GitHub. - -Follow these steps to create a GitHub App: - -* Go to *GitHub Settings → Developer settings → GitHub Apps*. -* Select `New GitHub App`. -* Add a name and Homepage URL, deselect `Active` under `Webhook`. -* Under `Permissions`, select `Read-only` for `Commit statuses`, `Contents`, `Issues`, `Metadata` and `Pull requests` under `Repository permissions`, select `Read-only` for `Members` under `Organization permissions`. -* Select `Any account` for `Where can this GitHub App be installed?`. -* Click `Create GitHub App`. -* Scroll down to the section `Private keys`, and click `Generate a private key`. -* Click `Install App` in the upper-left corner, select the organizations/personal accounts you want to install the GitHub App on, click `Install`. -* You can choose to install it on all repositories or selected repositories, and click `Install`. - -[discrete#es-connectors-github-compatability] -===== Compatibility - -Both GitHub and GitHub Enterprise are supported. - -[discrete#es-connectors-github-configuration] -===== Configuration - -The following configuration fields are required: - -Data source:: -Toggle between GitHub Cloud or GitHub Server. - -Server URL:: -URL of the GitHub Server instance. (GitHub Server only) - -Authentication method:: -The method to authenticate the GitHub instance. Toggle between `Personal access token` and `GitHub App`. - -Token:: -GitHub personal access token to authenticate the GitHub instance. This field is only available for `Personal access token` authentication method. - -Repository Type:: -Toggle between `Organization` and `Other`. -Note that document level security (DLS) is only available for `Organization` repositories. - -Organization Name:: -Name of the organization to fetch data from. This field is only available when `Authentication method` is set to `Personal access token` and `Repository Type` is set to `Organization`. - -App ID:: -App ID of the GitHub App. This field is only available when `Authentication method` is set to `GitHub App`. - -App private key:: -Private key generated for the GitHub App. This field is only available when `Authentication method` is set to `GitHub App`. - -List of repositories:: -Comma-separated list of repositories to fetch data from GitHub instance. If the value is `*` the connector will fetch data from all repositories present in the configured user's account. -+ -Default value is `*`. -+ -Examples: -+ -* `elasticsearch`,`elastic/kibana` -* `*` -[TIP] -==== -*Repository ownership* - -If the "OWNER/" portion of the "OWNER/REPO" repository argument is omitted, it defaults to the name of the authenticating user. - -In the examples provided here: - -* the `elasticsearch` repo synced will be the `/elasticsearch` repo -* the `kibana` repo synced will be the Elastic owned repo - -The "OWNER/" portion of the "OWNER/REPO" repository argument must be provided when `GitHub App` is selected as the `Authentication method`. -==== -[NOTE] -==== -This field can be bypassed by advanced sync rules. -==== - -Enable SSL:: -Enable SSL for the GitHub instance. - -SSL certificate:: -SSL certificate for the GitHub instance. Example: -+ -[source, txt] ----- ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- ----- - -Enable document level security:: -Toggle to enable <>. -When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -DLS is only available when `Repository Type` is set to `Organization`. - -[discrete#es-connectors-github-documents-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* **Repositories** -* **Pull Requests** -* **Issues** -* **Files & Folder** - -Only the following file extensions are ingested: - -* `.markdown` -* `.md` -* `.rst` - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elasticsearch Index. -==== - -[discrete#es-connectors-github-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-github-sync-rules] -===== Sync rules - -_Basic_ sync rules are identical for all connectors and are available by default. -For more information read <>. - -[discrete#es-connectors-github-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -The following section describes *advanced sync rules* for this connector. -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -The following sections provide examples of advanced sync rules for this connector. - -[discrete#es-connectors-github-sync-rules-advanced-branch] -*Indexing document and files based on branch name configured via branch key* - -[source,js] ----- -[ - { - "repository": "repo_name", - "filter": { - "branch": "sync-rules-feature" - } - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-github-sync-rules-advanced-issue-key] -*Indexing document based on issue query related to bugs via issue key* - -[source,js] ----- -[ - { - "repository": "repo_name", - "filter": { - "issue": "is:bug" - } - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-github-sync-rules-advanced-pr-key] -*Indexing document based on PR query related to open PR's via PR key* - -[source,js] ----- -[ - { - "repository": "repo_name", - "filter": { - "pr": "is:open" - } - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-github-sync-rules-advanced-issue-query-branch-name] -*Indexing document and files based on queries and branch name* - -[source,js] ----- -[ - { - "repository": "repo_name", - "filter": { - "issue": "is:bug", - "pr": "is:open", - "branch": "sync-rules-feature" - } - } -] ----- -// NOTCONSOLE - -[NOTE] -==== -All documents pulled by a given rule are indexed regardless of whether the document has already been indexed by a previous rule. -This can lead to document duplication, but the indexed documents count will differ in the logs. -Check the Elasticsearch index for the actual document count. -==== - -[discrete#es-connectors-github-sync-rules-advanced-overlapping] -*Advanced rules for overlapping* - -[source,js] ----- -[ - { - "filter": { - "pr": "is:pr is:merged label:auto-backport merged:>=2023-07-20" - }, - "repository": "repo_name" - }, - { - "filter": { - "pr": "is:pr is:merged label:auto-backport merged:>=2023-07-15" - }, - "repository": "repo_name" - } -] ----- -// NOTCONSOLE - -[NOTE] -==== -If `GitHub App` is selected as the authentication method, the "OWNER/" portion of the "OWNER/REPO" repository argument must be provided. -==== - -[discrete#es-connectors-github-content-extraction] -===== Content Extraction - -See <>. - -[discrete#es-connectors-github-known-issues] -===== Known issues - -There are currently no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-github-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-github-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-github-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-github-client-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. - -This self-managed connector is compatible with Elastic versions *8.10.0+*. - -To use this connector, satisfy all <>. - -[discrete#es-connectors-github-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-github-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, see <> -For additional usage operations, see <>. - -[discrete#es-connectors-github-client-personal-access-token] -====== GitHub personal access token - -Configure a GitHub personal access token to fetch data from GitHub. - -Follow these steps to generate a GitHub access token: - -* Go to *GitHub Settings → Developer settings → Personal access tokens → Tokens(classic)*. -* Select `Generate new token`. -* Add a note and select the following scopes: -** `repo` -** `user` -** `read:org` -* Select `Generate token` and copy the token. - -[discrete#es-connectors-github-client-github-app] -====== GitHub App - -Configure a GitHub App to fetch data from GitHub. - -Follow these steps to create a GitHub App: - -* Go to *GitHub Settings → Developer settings → GitHub Apps*. -* Select `New GitHub App`. -* Add a name and Homepage URL, deselect `Active` under `Webhook`. -* Under `Permissions`, select `Read-only` for `Commit statuses`, `Contents`, `Issues`, `Metadata` and `Pull requests` under `Repository permissions`, select `Read-only` for `Members` under `Organization permissions`. -* Select `Any account` for `Where can this GitHub App be installed?`. -* Click `Create GitHub App`. -* Scroll down to the section `Private keys`, and click `Generate a private key`. -* Click `Install App` in the upper-left corner, select the organizations/personal accounts you want to install the GitHub App on, click `Install`. -* You can choose to install it on all repositories or selected repositories, and click `Install`. - - -[discrete#es-connectors-github-client-compatability] -===== Compatibility - -Both GitHub and GitHub Enterprise are supported. - -[discrete#es-connectors-github-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/github.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required: - -`data_source`:: -GitHub Cloud or GitHub Server. - -`host`:: -URL of the GitHub Server instance. (GitHub Server only) - -`auth_method`:: -The method to authenticate the GitHub instance. Toggle between `Personal access token` and `GitHub App`. - -`token`:: -GitHub personal access token to authenticate the GitHub instance. This field is only available for `Personal access token` authentication method. - -`repo_type`:: -Toggle between `Organization` and `Other`. -Note that document level security (DLS) is only available for `Organization` repositories. - -`org_name`:: -Name of the organization to fetch data from. This field is only available when `Authentication method` is set to `Personal access token` and `Repository Type` is set to `Organization`. - -`app_id`:: -App ID of the GitHub App. This field is only available when `Authentication method` is set to `GitHub App`. - -`private_key`:: -Private key generated for the GitHub App. This field is only available when `Authentication method` is set to `GitHub App`. - -`repositories`:: -Comma-separated list of repositories to fetch data from GitHub instance. If the value is `*` the connector will fetch data from all repositories present in the configured user's account. -+ -Default value is `*`. -+ -Examples: -+ -* `elasticsearch`,`elastic/kibana` -* `*` -[TIP] -==== -*Repository ownership* - -If the "OWNER/" portion of the "OWNER/REPO" repository argument is omitted, it defaults to the name of the authenticating user. - -In the examples provided here: - -* the `elasticsearch` repo synced will be the `/elasticsearch` -* the `kibana` repo synced will be the Elastic owned repo - -The "OWNER/" portion of the "OWNER/REPO" repository argument must be provided when `GitHub App` is selected as the `Authentication method`. -==== -[NOTE] -==== -This field can be bypassed by advanced sync rules. -==== - -`ssl_enabled`:: -Whether SSL verification will be enabled. Default value is `False`. - -`ssl_ca`:: -Content of SSL certificate. Note: If `ssl_enabled` is `False`, the value in this field is ignored. Example certificate: -+ -[source, txt] ----- ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- ----- - -`use_document_level_security`:: -Toggle to enable <>. -When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -DLS is only available when `Repository Type` is set to `Organization`. - -`retry_count`:: -The number of retry attempts after failed request to GitHub. Default value is `3`. - -`use_text_extraction_service`:: -Requires a separate deployment of the <>. Requires that pipeline settings disable text extraction. -Default value is `False`. - -[discrete#es-connectors-github-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-github-client-documents-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* **Repositories** -* **Pull Requests** -* **Issues** -* **Files & Folder** - -Only the following file extensions are ingested: - -* `.markdown` -* `.md` -* `.rst` - -[NOTE] -==== -* Content of files bigger than 10 MB won't be extracted. -* Permissions are not synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elasticsearch Index. -==== - -[discrete#es-connectors-github-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-github-client-sync-rules] -===== Sync rules - -_Basic_ sync rules are identical for all connectors and are available by default. -For more information read <>. - -[discrete#es-connectors-github-client-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -The following section describes *advanced sync rules* for this connector. -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -The following sections provide examples of advanced sync rules for this connector. - -[discrete#es-connectors-github-client-sync-rules-advanced-branch] -*Indexing document and files based on branch name configured via branch key* - -[source,js] ----- -[ - { - "repository": "repo_name", - "filter": { - "branch": "sync-rules-feature" - } - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-github-client-sync-rules-advanced-issue-key] -*Indexing document based on issue query related to bugs via issue key* - -[source,js] ----- -[ - { - "repository": "repo_name", - "filter": { - "issue": "is:bug" - } - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-github-client-sync-rules-advanced-pr-key] -*Indexing document based on PR query related to open PR's via PR key* - -[source,js] ----- -[ - { - "repository": "repo_name", - "filter": { - "pr": "is:open" - } - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-github-client-sync-rules-advanced-issue-query-branch-name] -*Indexing document and files based on queries and branch name* - -[source,js] ----- -[ - { - "repository": "repo_name", - "filter": { - "issue": "is:bug", - "pr": "is:open", - "branch": "sync-rules-feature" - } - } -] ----- -// NOTCONSOLE - -[NOTE] -==== -All documents pulled by a given rule are indexed regardless of whether the document has already been indexed by a previous rule. -This can lead to document duplication, but the indexed documents count will differ in the logs. -Check the Elasticsearch index for the actual document count. -==== - -[discrete#es-connectors-github-client-sync-rules-advanced-overlapping] -*Advanced rules for overlapping* - -[source,js] ----- -[ - { - "filter": { - "pr": "is:pr is:merged label:auto-backport merged:>=2023-07-20" - }, - "repository": "repo_name" - }, - { - "filter": { - "pr": "is:pr is:merged label:auto-backport merged:>=2023-07-15" - }, - "repository": "repo_name" - } -] ----- -// NOTCONSOLE - -[NOTE] -==== -If `GitHub App` is selected as the authentication method, the "OWNER/" portion of the "OWNER/REPO" repository argument must be provided. -==== - -[discrete#es-connectors-github-client-content-extraction] -===== Content Extraction - -See <>. - -[discrete#es-connectors-github-client-connector-client-operations] -===== Self-managed connector operations - -[discrete#es-connectors-github-client-testing] -===== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the GitHub connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=github ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=github DATA_SIZE=small ----- - -[discrete#es-connectors-github-client-known-issues] -===== Known issues - -There are currently no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-github-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-github-client-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-gmail.asciidoc b/docs/reference/connector/docs/connectors-gmail.asciidoc deleted file mode 100644 index 594df7b9e681a..0000000000000 --- a/docs/reference/connector/docs/connectors-gmail.asciidoc +++ /dev/null @@ -1,366 +0,0 @@ -[#es-connectors-gmail] -=== Elastic Gmail connector reference -++++ -Gmail -++++ -// Attributes used in this file -:service-name: Gmail -:service-name-stub: gmail - -The _Elastic GMail connector_ is a <> for GMail. - - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-gmail-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-gmail-availability] -===== Availability and prerequisites - -This connector is available as a *managed connector* (managed service) in Elastic Cloud. - -This connector is compatible with Elastic versions *8.13.0+*. - -To use this connector, satisfy all <>. - -[discrete#es-connectors-gmail-create-native-connector] -==== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-gmail-usage] -===== Usage - -To use this connector as a managed connector in Elastic Cloud, use the *Connectors* workflow in the Kibana UI. - -To create a new {service-name} connector: - -. Navigate to *Search -> Connectors* page in the Kibana UI. -. Select the *New Native Connector* button. -. Select the *{service-name}* connector. - -For additional operations, see <>. - -[discrete#es-connectors-gmail-connector-authentication-prerequisites] -===== Connector authentication prerequisites - -Before syncing any data from GMail, you need to create a https://cloud.google.com/iam/docs/service-account-overview[service account^] with appropriate access to the GMail and the Google Directory API, which is part of the Google Admin SDK API. -You also need to enable domain-wide delegation to impersonate the users you're fetching messages from. - -To get started, log into https://cloud.google.com[Google Cloud Platform^] and go to the `Console`. - -. *Create a Google Cloud Project.* Give your project a name, change the project ID and click the Create button. -. *Enable Google APIs.* Choose APIs & Services from the left menu and click on `Enable APIs and Services`. You need to enable *GMail API* and the *Google Admin SDK API*. -. *Create a Service Account.* In the `APIs & Services` section, click on `Credentials` and click on `Create credentials` to create a service account. Give your service account a name and a service account ID. This is like an email address and will be used to identify your service account in the future. Click `Done` to finish creating the service account. -+ -Your service account needs to have access to at least the following scope: -+ -* `https://www.googleapis.com/auth/gmail.readonly` -. *Create a Key File*. - * In the Cloud Console, go to `IAM and Admin` > `Service accounts` page. - * Click the email address of the service account that you want to create a key for. - * Click the `Keys` tab. Click the `Add key` drop-down menu, then select `Create new key`. - * Select JSON as the Key type and then click `Create`. This will download a JSON file that will contain the service account credentials. - -. *Google Workspace domain-wide delegation of authority*. -+ -To access user data like messages on a Google Workspace domain, the service account that you created needs to be granted access by a super administrator for the domain. You can follow https://developers.google.com/cloud-search/docs/guides/delegation[the official documentation^] to perform Google Workspace domain-wide delegation of authority. -+ -You need to grant the following *OAuth Scopes* to your service account: -+ --- -* `https://www.googleapis.com/auth/admin.directory.user.readonly` --- -+ -This step allows the connector to access user data and their group memberships in your Google Workspace organization. - -[discrete#es-connectors-gmail-configuration] -===== Configuration - -The following configuration fields are required: - -GMail service account JSON:: -The service account credentials generated from Google Cloud Platform (JSON string). -Refer to the https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account[Google Cloud documentation^] for more information. - -Google Workspace admin email:: -Google Workspace admin email. -Required to enable document level security (DLS). -A service account with delegated authority can impersonate an admin user with permissions to access Google Workspace user data and their group memberships. -Refer to the https://support.google.com/a/answer/162106?hl=en[Google Cloud documentation^] for more information. - -Google customer ID:: -Google customer id. -Required to fetch messages and to enable document level security (DLS). -Go to `Google Workspace Admin Console` -> `Account` and copy the value under `Customer Id`. - -Include spam and trash emails:: -Toggle to fetch spam and trash emails. -Also works with document level security (DLS). - -Enable document level security:: -Toggle to enable <>. -DLS is supported for the GMail connector. -When enabled: -+ -* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -* Access control syncs will fetch users' access control lists and store them in a separate index. - -[discrete#es-connectors-gmail-documents-and-syncs] -===== Documents and syncs - -The connector will fetch all messages of all users the service account has access to. - -[discrete#es-connectors-gmail-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-gmail-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are available for this connector. -The connector supports the https://support.google.com/mail/answer/7190[GMail advanced search syntax] under the `messages` field. - -For example: - -[source,js] ----- -{ - "messages": [ - "before:2021/10/10", - "from:amy" - ] -} ----- -// NOTCONSOLE - -[discrete#es-connectors-gmail-document-level-security] -===== Document level security - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. -Refer to <> on this page for how to enable DLS for this connector. - -[NOTE] -==== -Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. -The example uses SharePoint Online as the data source, but the same steps apply to every connector. -==== - -[discrete#es-connectors-gmail-known-issues] -===== Known issues - -There are currently no known issues for this connector. - -[discrete#es-connectors-gmail-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-gmail-security] -===== Security - -See <>. - -[discrete#es-connectors-gmail-framework-and-source] -===== Framework and source - -This connector is built in Python with the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/gmail.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-gmail-connector-client-reference] -==== *Self-managed connector reference* - -.View *self-managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-gmail-client-availability] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector* from the *Elastic connector framework*. - -This self-managed connector is compatible with Elastic versions *8.10.0+*. - -To use this connector, satisfy all <>. - -[discrete#es-connectors-gmail-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-gmail-client-usage] -===== Usage - -To use this connector as a **self-managed connector**, use the *Connector* workflow in the Kibana UI. - -For additional operations, see <>. - -[discrete#es-connectors-gmail-client-connector-authentication-prerequisites] -===== Connector authentication prerequisites - -Before syncing any data from GMail, you need to create a https://cloud.google.com/iam/docs/service-account-overview[service account^] with appropriate access to the GMail and the Google Directory API, which is part of the Google Admin SDK API. -You also need to enable domain-wide delegation to impersonate the users you're fetching messages from. - -To get started, log into https://cloud.google.com[Google Cloud Platform^] and go to the `Console`. - -. *Create a Google Cloud Project.* Give your project a name, change the project ID and click the Create button. -. *Enable Google APIs.* Choose APIs & Services from the left menu and click on `Enable APIs and Services`. You need to enable *GMail API* and the *Google Admin SDK API*. -. *Create a Service Account.* In the `APIs & Services` section, click on `Credentials` and click on `Create credentials` to create a service account. Give your service account a name and a service account ID. This is like an email address and will be used to identify your service account in the future. Click `Done` to finish creating the service account. -+ -Your service account needs to have access to at least the following scope: -+ -* `https://www.googleapis.com/auth/gmail.readonly` -. *Create a Key File*. - * In the Cloud Console, go to `IAM and Admin` > `Service accounts` page. - * Click the email address of the service account that you want to create a key for. - * Click the `Keys` tab. Click the `Add key` drop-down menu, then select `Create new key`. - * Select JSON as the Key type and then click `Create`. This will download a JSON file that will contain the service account credentials. - -. *Google Workspace domain-wide delegation of authority*. -+ -To access user data like messages on a Google Workspace domain, the service account that you created needs to be granted access by a super administrator for the domain. You can follow https://developers.google.com/cloud-search/docs/guides/delegation[the official documentation^] to perform Google Workspace domain-wide delegation of authority. -+ -You need to grant the following *OAuth Scopes* to your service account: -+ --- -* `https://www.googleapis.com/auth/admin.directory.user.readonly` --- -+ -This step allows the connector to access user data and their group memberships in your Google Workspace organization. - -[discrete#es-connectors-gmail-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/gmail.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required: - -`GMail service account JSON`:: -The service account credentials generated from Google Cloud Platform (JSON string). -Refer to the https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account[Google Cloud documentation^] for more information. - -`Google Workspace admin email`:: -Google Workspace admin email. -Required to enable document level security (DLS). -A service account with delegated authority can impersonate an admin user with permissions to access Google Workspace user data and their group memberships. -Refer to the https://support.google.com/a/answer/162106?hl=en[Google Cloud documentation^] for more information. - -`Google customer id`:: -Google customer id. -Required to fetch messages and to enable document level security (DLS). -Go to `Google Workspace Admin Console` -> `Account` and copy the value under `Customer Id`. - -`Include spam and trash emails`:: -Toggle to fetch spam and trash emails. -Also works with DLS. - -`Enable document level security`:: -Toggle to enable <>. -DLS is supported for the GMail connector. -When enabled: -+ -* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -* Access control syncs will fetch users' access control lists and store them in a separate index. - -[discrete#es-connectors-gmail-client-deployment-using-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-gmail-client-documents-and-syncs] -===== Documents and syncs - -The connector will fetch all messages of all users the service account has access to. - -[discrete#es-connectors-gmail-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-gmail-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are available for this connector. -The connector supports the https://support.google.com/mail/answer/7190[GMail advanced search syntax] under the `messages` field. - -For example: - -[source,js] ----- -{ - "messages": [ - "before:2021/10/10", - "from:amy" - ] -} ----- -// NOTCONSOLE - -[discrete#es-connectors-gmail-client-document-level-security] -===== Document level security - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. -Refer to <> on this page for how to enable DLS for this connector. - -[NOTE] -==== -Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. -The example uses SharePoint Online as the data source, but the same steps apply to every connector. -==== - -[discrete#es-connectors-gmail-client-known-issues] -===== Known issues - -There are currently no known issues for this connector. - -[discrete#es-connectors-gmail-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-gmail-client-security] -===== Security - -See <>. - -[discrete#es-connectors-gmail-client-framework-and-source] -===== Framework and source - -This connector is built in Python with the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/gmail.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-google-cloud.asciidoc b/docs/reference/connector/docs/connectors-google-cloud.asciidoc deleted file mode 100644 index 64fcb82b19ab7..0000000000000 --- a/docs/reference/connector/docs/connectors-google-cloud.asciidoc +++ /dev/null @@ -1,266 +0,0 @@ -[#es-connectors-google-cloud] -=== Google Cloud Storage Connector -++++ -Google Cloud Storage -++++ - -// Attributes used in this file -:service-name: Google Cloud Storage -:service-name-stub: google_cloud_storage - -The _Elastic Google Cloud Storage connector_ is a <> for https://cloud.google.com/storage[Google Cloud Storage^] data sources. - - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-google-cloud-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-google-cloud-availability-prerequisites] -===== Availability and prerequisites - -This connector is available natively in Elastic Cloud since *8.12.0.* -To use this connector in Elastic Cloud, satisfy all <>. - -[discrete#es-connectors-google-cloud-usage] -===== Usage - -The Google Cloud Storage service account must have (at least) the following scopes and roles: - -* `resourcemanager.projects.get` -* `serviceusage.services.use` -* `storage.buckets.list` -* `storage.objects.list` -* `storage.objects.get` - -Google Cloud Storage service account credentials are stored in a JSON file. - -[discrete#es-connectors-google-cloud-configuration] -===== Configuration - -The following configuration field is required to set up the connector: - -Buckets:: -List of buckets to index. -`*` will index all buckets. - -Google Cloud service account JSON:: -The service account credentials generated from Google Cloud Storage (JSON string). -Refer to the https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account[Google Cloud documentation^] for more information. - -[discrete#es-connectors-google-cloud-documents-syncs] -===== Documents and syncs - -The connector will fetch all buckets and paths the service account has access to. - -The `Owner` field is not fetched as `read_only` scope doesn’t allow the connector to fetch IAM information. - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permission are not synced. All documents indexed to an Elastic deployment will be visible to all users with access to that Elastic Deployment. -==== - -[discrete#es-connectors-google-cloud-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-google-cloud-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. -Currently filtering is controlled by ingest pipelines. - -[discrete#es-connectors-google-cloud-content-extraction] -===== Content extraction - -See <>. - -[source,shell] ----- -$ make ftest NAME=google_cloud_storage ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=google_cloud_storage DATA_SIZE=small ----- - -[discrete#es-connectors-google-cloud-known-issues] -===== Known issues - -There are currently no known issues for this connector. - -[discrete#es-connectors-google-cloud-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-google-cloud-security] -===== Security - -See <>. - -[discrete#es-connectors-google-cloud-source] -===== Framework and source - -This connector is built with the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/google_cloud_storage.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-google-cloud-connector-client-reference] -==== *Self-managed connector reference* - -.View *self-managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-google-cloud-client-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. -This self-managed connector is compatible with Elastic versions *8.6.0+*. -To use this connector, satisfy all <>. - -[discrete#es-connectors-google-cloud-client-usage] -===== Usage - -The Google Cloud Storage service account must have (at least) the following scopes and roles: - -* `resourcemanager.projects.get` -* `serviceusage.services.use` -* `storage.buckets.list` -* `storage.objects.list` -* `storage.objects.get` - -Google Cloud Storage service account credentials are stored in a JSON file. - -[discrete#es-connectors-google-cloud-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/google_cloud_storage.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required to set up the connector: - -`buckets`:: -List of buckets to index. -`*` will index all buckets. - -`service_account_credentials`:: -The service account credentials generated from Google Cloud Storage (JSON string). -Refer to the https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account[Google Cloud documentation^] for more information. - -`retry_count`:: -The number of retry attempts after a failed call to Google Cloud Storage. -Default value is `3`. - -[discrete#es-connectors-google-cloud-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-google-cloud-client-documents-syncs] -===== Documents and syncs - -The connector will fetch all buckets and paths the service account has access to. - -The `Owner` field is not fetched as `read_only` scope doesn’t allow the connector to fetch IAM information. - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. -* Permission are not synced. All documents indexed to an Elastic deployment will be visible to all users with access to that Elastic Deployment. -==== - -[discrete#es-connectors-google-cloud-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-google-cloud-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. -Currently filtering is controlled by ingest pipelines. - -[discrete#es-connectors-google-cloud-client-content-extraction] -===== Content extraction - -See <>. - -[discrete#es-connectors-google-cloud-client-client-operations-testing] -===== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the Google Cloud Storage connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=google_cloud_storage ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=google_cloud_storage DATA_SIZE=small ----- - -[discrete#es-connectors-google-cloud-client-known-issues] -===== Known issues - -There are currently no known issues for this connector. - -[discrete#es-connectors-google-cloud-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-google-cloud-client-security] -===== Security - -See <>. - -[discrete#es-connectors-google-cloud-client-source] -===== Framework and source - -This connector is built with the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/google_cloud_storage.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-google-drive.asciidoc b/docs/reference/connector/docs/connectors-google-drive.asciidoc deleted file mode 100644 index d3c4a0886efc3..0000000000000 --- a/docs/reference/connector/docs/connectors-google-drive.asciidoc +++ /dev/null @@ -1,409 +0,0 @@ -[#es-connectors-google-drive] -=== Elastic Google Drive connector reference -++++ -Google Drive -++++ -// Attributes used in this file -:service-name: Google Drive -:service-name-stub: google_drive - -The _Elastic Google Drive connector_ is a <> for https://www.google.com/drive[Google Drive^]. -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-google-drive-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-google-drive-availability-and-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* as of Elastic version *8.11.0*. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[discrete#es-connectors-google-drive-usage] -===== Usage - -To use this connector natively in Elastic Cloud, see <>. - -For additional operations, see <>. - -[discrete#es-connectors-google-drive-connector-authentication-prerequisites] -===== Connector authentication prerequisites - -Before syncing any data from Google Drive, you need to create a https://cloud.google.com/iam/docs/service-account-overview[service account^] with appropriate access to Google Drive API. - -To get started, log into https://cloud.google.com[Google Cloud Platform^] and go to the `Console`. - -. *Create a Google Cloud Project.* Give your project a name, change the project ID and click the Create button. - -. *Enable Google APIs.* Choose APIs & Services from the left menu and click on `Enable APIs and Services`. You need to enable the *Drive API*. - -. *Create a Service Account.* In the `APIs & Services` section, click on `Credentials` and click on `Create credentials` to create a service account. Give your service account a name and a service account ID. This is like an email address and will be used to identify your service account in the future. Click `Done` to finish creating the service account. Your service account needs to have access to at least the following scope: -- `https://www.googleapis.com/auth/drive.readonly` - -. *Create a Key File*. - - In the Cloud Console, go to `IAM and Admin` > `Service accounts` page. - - Click the email address of the service account that you want to create a key for. - - Click the `Keys` tab. Click the `Add key` drop-down menu, then select `Create new key`. - - Select JSON as the Key type and then click `Create`. This will download a JSON file that will contain the service account credentials. - -. *[Optional] Share Google Drive Folders.* If you use domain-wide delegation for syncing data you can skip this step. Go to your Google Drive. Right-click the folder or shared drive, choose `Share` and add the email address of the service account you created in step 3. as a viewer to this folder. - -[NOTE] -==== -When you grant a service account access to a specific folder or shared drive in Google Drive, it's important to note that the permissions extend to all the children within that folder or drive. -This means that any folders or files contained within the granted folder or drive inherit the same access privileges as the parent. -==== - -[discrete#es-connectors-google-drive-additional-prerequisites-for-domain-wide-delegation] -====== Additional authentication prerequisites for domain-wide delegation - -This step is *required* when *Use domain-wide delegation for data sync* or *Enable document level security* configuration option is enabled. - -. *Enable Google APIs*. -+ -Choose APIs & Services from the left menu and click on `Enable APIs and Services`. You need to enable the *Admin SDK API* and *Drive API*. - -. *Google Workspace domain-wide delegation of authority*. -+ -To access drive and user data in a Google Workspace domain, the service account that you created needs to be granted access by a super administrator for the domain. You can follow https://developers.google.com/cloud-search/docs/guides/delegation[the official documentation^] to perform Google Workspace domain-wide delegation of authority. -+ -You need to grant the following *OAuth Scopes* to your service account: -+ --- -* `https://www.googleapis.com/auth/admin.directory.group.readonly` -* `https://www.googleapis.com/auth/admin.directory.user.readonly` -* `https://www.googleapis.com/auth/drive.readonly` -* `https://www.googleapis.com/auth/drive.metadata.readonly` --- -+ -This step allows the connector to: - -* access user data and their group memberships in a Google Workspace organization -* access Google Drive data in drives associated to Google Workspace members - -[discrete#es-connectors-google-drive-configuration] -===== Configuration - -The following configuration fields are required: - -Google Drive service account JSON:: -The service account credentials generated from Google Cloud Platform (JSON string). -Refer to the https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account[Google Cloud documentation^] for more information. - -Enable document level security:: -Toggle to enable <>. -DLS is supported for the Google Drive connector. -When enabled: -+ -* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -* Access control syncs will fetch users' access control lists and store them in a separate index. - -Google Workspace admin email:: -Google Workspace admin email. -Required to enable document level security (DLS) or domain-wide delegation for data sync. -A service account with delegated authority can impersonate an admin user with permissions to access Google Workspace user data and their group memberships. -Refer to the https://support.google.com/a/answer/162106?hl=en[Google Cloud documentation^] for more information. - -[discrete#es-connectors-google-drive-documents-and-syncs] -===== Documents and syncs - -The connector will fetch all files and folders the service account has access to. - -It will attempt to extract the content from Google Suite documents (Google Docs, Google Sheets and Google Slides) and regular files. - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced by default. -You must first enable <>. -Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-google-drive-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-google-drive-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. -Currently filtering is controlled via ingest pipelines. - -[discrete#es-connectors-google-drive-document-level-security] -===== Document level security - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. -Refer to <> on this page for how to enable DLS for this connector. - -[NOTE] -==== -Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. -The example uses SharePoint Online as the data source, but the same steps apply to every connector. -==== - -[discrete#es-connectors-google-drive-content-extraction] -===== Content extraction - -See <> for more information. - -[discrete#es-connectors-google-drive-known-issues] -===== Known issues - -There are currently no known issues for this connector. - -[discrete#es-connectors-google-drive-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-google-drive-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-google-drive-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-google-drive-client-availability-and-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. To use this connector, satisfy all <>. - -[discrete#es-connectors-google-drive-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, see <> -For additional usage operations, see <>. - -[discrete#es-connectors-google-drive-client-connector-authentication-prerequisites] -===== Connector authentication prerequisites - -Before syncing any data from Google Drive, you need to create a https://cloud.google.com/iam/docs/service-account-overview[service account^] with appropriate access to Google Drive API. - -To get started, log into https://cloud.google.com[Google Cloud Platform^] and go to the `Console`. - -. *Create a Google Cloud Project.* Give your project a name, change the project ID and click the Create button. - -. *Enable Google APIs.* Choose APIs & Services from the left menu and click on `Enable APIs and Services`. You need to enable the *Drive API*. - -. *Create a Service Account.* In the `APIs & Services` section, click on `Credentials` and click on `Create credentials` to create a service account. Give your service account a name and a service account ID. This is like an email address and will be used to identify your service account in the future. Click `Done` to finish creating the service account. Your service account needs to have access to at least the following scope: -- `https://www.googleapis.com/auth/drive.readonly` - -. *Create a Key File*. - - In the Cloud Console, go to `IAM and Admin` > `Service accounts` page. - - Click the email address of the service account that you want to create a key for. - - Click the `Keys` tab. Click the `Add key` drop-down menu, then select `Create new key`. - - Select JSON as the Key type and then click `Create`. This will download a JSON file that will contain the service account credentials. - -. *[Optional] Share Google Drive Folders.* If you use domain-wide delegation for syncing data you can skip this step. Go to your Google Drive. Right-click the folder or shared drive, choose `Share` and add the email address of the service account you created in step 3. as a viewer to this folder. - -[NOTE] -==== -When you grant a service account access to a specific folder or shared drive in Google Drive, it's important to note that the permissions extend to all the children within that folder or drive. -This means that any folders or files contained within the granted folder or drive inherit the same access privileges as the parent. -==== - -[discrete#es-connectors-google-drive-client-additional-prerequisites-for-domain-wide-delegation] -====== Additional authentication prerequisites for domain-wide delegation - -This step is *required* when *Use domain-wide delegation for data sync* or *Enable document level security* configuration option is enabled. - -. *Enable Google APIs*. -+ -Choose APIs & Services from the left menu and click on `Enable APIs and Services`. You need to enable the *Admin SDK API* and *Drive API*. - -. *Google Workspace domain-wide delegation of authority*. -+ -To access drive and user data in a Google Workspace domain, the service account that you created needs to be granted access by a super administrator for the domain. You can follow https://developers.google.com/cloud-search/docs/guides/delegation[the official documentation^] to perform Google Workspace domain-wide delegation of authority. -+ -You need to grant the following *OAuth Scopes* to your service account: -+ --- -* `https://www.googleapis.com/auth/admin.directory.group.readonly` -* `https://www.googleapis.com/auth/admin.directory.user.readonly` -* `https://www.googleapis.com/auth/drive.readonly` -* `https://www.googleapis.com/auth/drive.metadata.readonly` --- -+ -This step allows the connector to: - -* access user data and their group memberships in a Google Workspace organization -* access Google Drive data in drives associated to Google Workspace members - -[discrete#es-connectors-google-drive-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/google_drive.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required: - -`service_account_credentials`:: -The service account credentials generated from Google Cloud Platform (JSON string). -Refer to the https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account[Google Cloud documentation^] for more information. - -`use_domain_wide_delegation_for_sync`:: -Use https://developers.google.com/cloud-search/docs/guides/delegation[domain-wide delegation] to automatically sync content from all shared and personal drives in the Google workspace. -This eliminates the need to manually share Google Drive data with your service account, though it may increase the sync time. -If disabled, only items and folders manually shared with the service account will be synced. - -`google_workspace_admin_email_for_data_sync`:: -Required when domain-wide delegation for data sync is enabled. -This email is used for discovery and syncing of shared drives. Only the shared drives this user has access to are synced. - -`google_workspace_email_for_shared_drives_sync`:: -Required when domain-wide delegation for data sync is enabled. -Provide the Google Workspace user email for discovery and syncing of shared drives. Only the shared drives this user has access to will be synced. - -`use_document_level_security`:: -Toggle to enable <>. -DLS is supported for the Google Drive connector. -When enabled: -+ -* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -* Access control syncs will fetch users' access control lists and store them in a separate index. - -`google_workspace_admin_email`:: -Google Workspace admin email. -Required to enable document level security (DLS) or domain-wide delegation for data sync. -A service account with delegated authority can impersonate an admin user with permissions to access Google Workspace user data and their group memberships. -Refer to the https://support.google.com/a/answer/162106?hl=en[Google Cloud documentation^] for more information. - -`max_concurrency`:: -The maximum number of concurrent HTTP requests to the Google Drive API. -Increasing this value can improve data retrieval speed, but it may also place higher demands on system resources and network bandwidth. - -`use_text_extraction_service`:: -Requires a separate deployment of the <>. -Requires that pipeline settings disable text extraction. -Default value is `False`. - -[discrete#es-connectors-google-drive-client-deployment-using-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-google-drive-client-documents-and-syncs] -===== Documents and syncs - -The connector will fetch all files and folders the service account has access to. - -It will attempt to extract the content from Google Suite documents (Google Docs, Google Sheets and Google Slides) and regular files. - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted -* Permissions are not synced by default. -You must first enable <>. -Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-google-drive-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-google-drive-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. -Currently filtering is controlled via ingest pipelines. - -[discrete#es-connectors-google-drive-client-document-level-security] -===== Document level security - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. -Refer to <> on this page for how to enable DLS for this connector. - -[NOTE] -==== -Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. -The example uses SharePoint Online as the data source, but the same steps apply to every connector. -==== - -[discrete#es-connectors-google-drive-client-content-extraction] -===== Content extraction - -See <> for more information. - -[discrete#es-connectors-google-drive-client-end-to-end-testing] -===== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. Refer to <> for more details. - -To perform E2E testing for the Google Drive connector, run the following command: - -[source,shell] ----- -make ftest NAME=google_drive ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=google_drive DATA_SIZE=small ----- - -[discrete#es-connectors-google-drive-client-known-issues] -===== Known issues - -There are currently no known issues for this connector. - -[discrete#es-connectors-google-drive-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-google-drive-client-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-graphql.asciidoc b/docs/reference/connector/docs/connectors-graphql.asciidoc deleted file mode 100644 index bc7083d482e79..0000000000000 --- a/docs/reference/connector/docs/connectors-graphql.asciidoc +++ /dev/null @@ -1,248 +0,0 @@ -[#es-connectors-graphql] -=== Elastic GraphQL connector reference -++++ -GraphQL -++++ - -// Attributes used in this file -:service-name: GraphQL -:service-name-stub: graphql - -The Elastic GraphQL connector is written in Python using the https://github.com/elastic/connectors/tree/main[Elastic connector framework]. View the https://github.com/elastic/connectors/blob/main/connectors/sources/graphql.py[source code for this connector]. - -[discrete#es-connectors-graphql-connector-availability-and-prerequisites] -==== Availability and prerequisites - -This connector was introduced in Elastic *8.14.0*, available as a *self-managed* self-managed connector. - -To use this connector, satisfy all <>. -Importantly, you must deploy the connectors service on your own infrastructure. -You have two deployment options: - -* <>. Use this option if you're comfortable working with Python and want to iterate quickly locally. -* <>. Use this option if you want to deploy the connectors to a server, or use a container orchestration platform. - -[NOTE] -==== -This connector is in *technical preview* and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Technical preview features are not subject to the support SLA of official GA features. -==== - -[discrete#es-connectors-graphql-connector-usage] -==== Usage - -To set up this connector in the UI, select the *GraphQL* tile when creating a new connector under *Search -> Connectors*. - -If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. - -For additional operations, see <>. - -[discrete#es-connectors-graphql-connector-docker] -==== Deploy with Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-graphql-connector-configuration] -==== Configuration - -[discrete#es-connectors-graphql-connector-configure-graphql-connector] -===== Configure GraphQL connector - -Note the following configuration fields: - -`http_endpoint` (required):: -Base URL of the GraphQL endpoint. -*Example*: `https://api.xyz.com/graphql` - -`http_method` (required):: -`GET` or `POST`. - -`authentication_method`(required):: -Select from `No Auth`, `Basic Auth`, and `Bearer Token`. - -`username`:: -Required when using basic authentication. - -`password`:: -Required when using basic authentication. - -`token`:: -Required when using bearer token authentication. - -`graphql_query` (required):: -Query used to fetch data from the source. -Can contain variables provided in the `graphql_variables` field. -The connector will substitute the variables in the query with values from `graphql_variables` and make a GraphQL query to the source. -+ -*Example*: -+ -[source,js] ----- -query getUser($id: ID!) { - user(id: $id) { - name - email - } -} ----- -// NOTCONSOLE - -`graphql_variables`:: -A JSON object of key/value pairs containing variables used in the GraphQL query. -The connector will substitute the variables in the query with the values provided here and make a GraphQL query to the source. -+ -*Example*: -+ -For the GraphQL query `query getUser($id: ID!) { user(id: $id) { name } }` -+ -* Where the value of `graphql_variables` is `{"id": "123"}` -* The connector will execute `query getUser { user(id: "123") { name } }` to fetch data from the source - -`graphql_object_to_id_map` (required):: -A JSON mapping between GraphQL response objects to index and their ID fields. -The connector will fetch data for each object (JSON key) and use the provided ID field (JSON value) to index the object into Elasticsearch. -The connector will index all fields for each object specified in the mapping. -Use dot `(.)` notation to specify the full path from the root of the GraphQL response to the desired object. -+ -*Example*: -+ -The GraphQL query `query getUser { organization { users{ user_id name email} } }` fetches all available users from the source. -To index every user as a separate document configure this field as below. -+ -[source,js] ----- -{ - "organization.users": "user_id" -} ----- -// NOTCONSOLE -+ -In this example `user_id` is unique in every user document. Therefore, we set `user_id` as the value for `organization.users`. -+ -[NOTE] -==== -The path provided in this field should only contain JSON objects and not lists. -==== - -`headers`:: -JSON object containing custom headers to be sent with each GraphQL request: -+ -[source,js] ----- -{ - "content-type": "Application/json" -} ----- -// NOTCONSOLE - -`pagination_model` (required):: -This field specifies the pagination model to be used by the connector. -The connector supports `No pagination` and `Cursor-based pagination` pagination models. -+ -For cursor-based pagination, add `pageInfo {endCursor hasNextPage}` and an `after` argument variable in your query at the desired node (`Pagination key`). -Use the `after` query argument with a variable to iterate through pages. -The default value for this field is `No pagination`. Example: -+ -For `Cursor-based pagination`, the query should look like this example: -+ -[source,js] ----- -query getUsers($cursor: String!) { - sampleData { - users(after: $cursor) { - pageInfo { - endCursor - hasNextPage - } - nodes { - first_name - last_name - address - } - } - } -} ----- -// NOTCONSOLE -+ -The value of `pagination_key` is `sampleData.users` so it must contain: -+ -* `pageInfo {endCursor hasNextPage}` -* the `after` argument with a variable when using cursor-based pagination - -`pagination_key` (required):: -Specifies which GraphQL object is used for pagination. -Use `.` to provide the full path of the object from the root of the response. -+ -*Example*: -+ -* `organization.users` - -`connection_timeout`:: -Specifies the maximum time in seconds to wait for a response from the GraphQL source. -Default value is *30 seconds*. - -[discrete#es-connectors-graphql-connector-documents-and-syncs] -==== Documents and syncs - -The connector syncs the objects and entities based on GraphQL Query and GraphQL Object List. - -[discrete#es-connectors-graphql-connector-sync-types] -==== Sync types - -<> are supported by default for all connectors. - -This connector currently does not support <>. - -[discrete#es-connectors-graphql-connector-sync-rules] -==== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-graphql-connector-advanced-sync-rules] -==== Advanced Sync Rules - -Advanced sync rules are not available for this connector in the present version. - -[discrete#es-connectors-graphql-connector-connector-client-operations] -==== Connector Client operations - -[discrete#es-connectors-graphql-connector-end-to-end-testing] -===== End-to-end Testing - -The connector framework enables operators to run functional tests against a real data source, using Docker Compose. -You don't need a running Elasticsearch instance or GraphQL source to run this test. - -Refer to <> for more details. - -To perform E2E testing for the GraphQL connector, run the following command: - -```shell -$ make ftest NAME=graphql -``` -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=graphql DATA_SIZE=small ----- - -By default, `DATA_SIZE=MEDIUM`. - -[discrete#es-connectors-graphql-connector-known-issues] -==== Known issues - -* Every document will be updated in every sync. -* If the same field name exists with different types across different objects, the connector might raise a mapping parser exception. - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-graphql-connector-troubleshooting] -==== Troubleshooting - -See <>. - -[discrete#es-connectors-graphql-connector-security] -==== Security - -See <>. - diff --git a/docs/reference/connector/docs/connectors-hosted-tutorial-mongo.asciidoc b/docs/reference/connector/docs/connectors-hosted-tutorial-mongo.asciidoc deleted file mode 100644 index 6237a09129965..0000000000000 --- a/docs/reference/connector/docs/connectors-hosted-tutorial-mongo.asciidoc +++ /dev/null @@ -1,193 +0,0 @@ -[#es-mongodb-start] -=== MongoDB managed connector tutorial -++++ -Managed connector tutorial (MongoDB) -++++ - -// Learn how to use the <> to sync data from https://www.mongodb.com/docs/atlas/[MongoDB Atlas^] to an Elastic Cloud deployment. - -This tutorial explains how to set up automatic, ongoing syncs from a MongoDB database to a search-optimized index on Elastic Cloud. -We'll use an Atlas deployment, together with built-in sample datasets in this tutorial, but you can also use your own data. - -This tutorial is an example of: - -* How to use the <>, which is compatible with MongoDB Atlas and on premises MongoDB servers. -See <>. -* How to use any connector available as a <>. -The workflow in this tutorial applies to all Elastic managed connectors. - -This tutorial has three main sections: - -* <>: First you'll need to get *MongoDB Atlas* up and running. -(*Skip this step* if you already have a MongoDB instance you'd like to use.) -** You'll create a free account, set up a free Atlas cluster, and load some sample data. -* <> Once that's done, you'll need to gather some details about your Atlas cluster, so you can connect it to an Elastic Cloud deployment. -* <>: Next, you'll need to get *Elastic Cloud* up and running. -** Then you'll need to create an Elasticsearch index and configure the Elastic connector to interface with your Atlas cluster. -We'll do all this in the Kibana UI. -** Once configured, you'll set a syncing schedule to start indexing your MongoDB data into Elasticsearch and ensure it stays up to date. - -[discrete#es-mongodb-start-atlas-setup] -== Set up MongoDB Atlas - -Follow the steps in the MongoDB documentation to https://www.mongodb.com/docs/atlas/getting-started[create a free Atlas account^]: - -* Create and deploy a free Atlas cluster. -* Under *Security > Network Access*, add IP access to `0.0.0.0/0`. -This CIDR-notation allows connections from any IP address. -This enables the Elastic connector, running on Elastic Cloud, to access Atlas. -See https://www.mongodb.com/docs/atlas/security/add-ip-address-to-list/[the Atlas documentation^] for complete instructions. -* Create a *database user*, with a username and password combination. -Select *Add new database user* in *Security > Database access*. -Keep these details handy, as you'll need them to configure the connector later. - -[discrete#es-mongodb-start-load-sample-data] -=== Load sample data into Atlas - -In this example we'll use the sample data available to MongoDB Atlas. -You can do this in the MongoDB Atlas UI. - -Use the *Load Sample Dataset* button in the Atlas UI, under *Database Deployments*. -Find this by selecting the *"..."* button next to your cluster name. - -.Loading sample data in Atlas UI -image::images/mongodb-load-sample-data.png[Load sample data in Atlas UI] - -Detailed instructions are available in the https://www.mongodb.com/docs/atlas/sample-data[Atlas documentation^]. - -[discrete#es-mongodb-start-view-sample-data] -=== Browse sample data in Atlas UI - -Once loaded, you can view your sample data in the Atlas UI, by selecting your database deployment's *Browse Collections* button. -Confirm that the sample databases have been added to your database deployment. - -In this example, we'll use the https://www.mongodb.com/docs/atlas/sample-data/sample-mflix/[`sample_mflix`^] dataset, which contains data on movies and movie theaters. -The database contains collections for certain metadata, including users and comments on specific movies. -We'll focus on the `comments` collection in this example. -Each document contains a comment, and information such as the commenter's name and email address. - -.A sample MongoDB document from the `comments` collection -image::images/mongodb-sample-document.png[Sample document from the comments collection] - -Later, once the connector transforms MongoDB documents into Elasticsearch documents, you can compare their structure. - -We've added data to our MongoDB Atlas cluster, and now we need to configure the Elastic MongoDB connector. - -[discrete#es-mongodb-start-gather-details] -== Gather details about your MongoDB instance - -Before we switch over to working in Elastic Cloud, we need to gather some details about our MongoDB Atlas cluster. -We'll need these details to configure the Elastic MongoDB connector. -You can find these details in the Atlas UI. - -Find the following details: - -* *Host*: The URI of your MongoDB Atlas cluster. -This should look like `mongodb+srv://.hjksqfc.mongodb.net`. -Find this by https://www.mongodb.com/docs/atlas/tutorial/connect-to-your-cluster/#connect-to-your-atlas-cluster[connecting to your cluster^] in the MongoDB Atlas UI. -** One way to find this URI is to select *Connect with MongoDB Shell* and copy the connection string from the CLI instructions. -* *Database*: The name of the database you want to sync. -In this example, we'll use the `sample_mflix` database. -* *Collection*: The name of the collection you want to sync. -In this example, we'll use the `comments` collection of the `sample_mflix` database. -* *Username*: The username you created earlier, in the setup phase. -* *Password*: The password you created earlier. - -Keep these details handy! - -[discrete#es-mongodb-start-elastic-cloud] -== Set up Elastic Cloud - -Everything is set up in MongoDB Atlas and we have the details we need to configure the Elastic MongoDB connector. -First we'll need to get an Elastic Cloud deployment up and running. - -[discrete#es-mongodb-start-create-deployment] -=== Create an {ecloud} deployment - -[NOTE] -==== -This step is for users who are new to Elastic Cloud. -Skip this step if your team already has an Elastic Cloud deployment. -==== - -Log in to https://cloud.elastic.co/[Elastic Cloud^], and use the UI to create a deployment. -You'll need to run version *8.5.0+* or later. - -Read <> for full details. - -Once you're deployment is created, navigate to *Search*. - -[discrete#es-mongodb-start-create-index] -=== Create an Elasticsearch index - -The Elastic connector will sync your MongoDB data into a search-optimized Elasticsearch index. -The first step is to create your index in the Kibana UI. - -In the main menu, navigate to *Search > Content > Indices*, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. - -Follow these steps to create your index: - -* Select *Create an Elasticsearch index*. -* Choose *Connector* as your ingestion method. -* Select the *MongoDB* connector type. -* Name your new index, for example `search-mongo-sample`, then save. -This takes you to the *Configuration* tab of your index overview page. - -Next we need to input our Atlas details to configure the connector. - -[discrete#es-mongodb-start-configure-connector] -=== Configure the MongoDB connector - -Using the <>, configure the MongoDB connector. -Enter the details under the *Configuration* step. - -Set the *Direct connection* option to `false` for this example. -You must enable SSL/TLS for MongoDB Atlas, so toggle on the *SSL/TLS Connection* option. - -.Example configuration for the MongoDB connector -image::images/mongodb-connector-config.png[Example configuration for the MongoDB connector, width=350] - -Once you've entered these details, select *Save configuration*. - -[discrete#es-mongodb-start-launch-sync] -=== Begin syncing - -Once you've configured your MongoDB connector, it's time to schedule a sync. - -The UI will take you to the *Scheduling* tab of your index overview page. -We'll schedule a recurring sync for this example, which will run every day at midnight. - -In the *Scheduling* tab: - -* Toggle *Enable recurring syncs with the following schedule*. -* Select *Frequency*, "Every" `day`. -* Select *Time*, "At" `00:00`. -* *Save* this sync schedule. - -Once you save your sync schedule, the connector will start syncing your MongoDB Atlas data into Elasticsearch. - -[discrete#es-mongodb-start-verify-documents] -=== Verify documents - -[TIP] -==== -Our <> explains how documents in your MongoDB database and collection are extracted and transformed into documents in your Elasticsearch index. -==== - -If all the configuration details are correct, the sync will begin and documents will start to appear in your Elasticsearch index. - -As soon as your first documents are synced, you can view the documents and inspect the mapping for the index: - -* In Kibana, navigate to *Search* > *Content* > *Indices* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. -* Select your index, for example `search-mongo-sample`. -* Choose the *Documents* tab to view the synced documents. -Expand a document to view its fields. - -[discrete#es-mongodb-start-learn-more] -== Learn more - -* Refer to the <> for detailed information about the connector, including how *sync rules* work. -* For an overview of all Elastic managed connectors, see <>. -* Learn about <> for Elastic managed connectors. -* Learn about {ref}/ingest-pipeline-search.html[ingest pipelines for Search indices] -* Refer to the official https://www.mongodb.com/docs/atlas/[MongoDB Atlas documentation^] for MongoDB-specific questions. diff --git a/docs/reference/connector/docs/connectors-jira.asciidoc b/docs/reference/connector/docs/connectors-jira.asciidoc deleted file mode 100644 index b11070b1a581a..0000000000000 --- a/docs/reference/connector/docs/connectors-jira.asciidoc +++ /dev/null @@ -1,544 +0,0 @@ -[#es-connectors-jira] -=== Elastic Jira connector reference -++++ -Jira -++++ -// Attributes used in this file -:service-name: Jira -:service-name-stub: jira - -The _Elastic Jira connector_ is a <> for https://www.atlassian.com/software/jira[Atlassian Jira^]. -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-jira-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-jira-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* on Elastic Cloud, as of *8.9.1*. - -[NOTE] -==== -Jira Data Center support was added in 8.13.0 in technical preview and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Technical preview features are not subject to the support SLA of official GA features. -==== - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[discrete#es-connectors-jira-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-jira-usage] -===== Usage - -To use this connector natively in Elastic Cloud, see <>. - -For additional operations, see <>. - -[discrete#es-connectors-jira-compatability] -===== Compatibility - -* Jira Cloud, Jira Server, and Jira Data Center *versions 7 or later*. - -[discrete#es-connectors-jira-configuration] -===== Configuration - -The following configuration fields are required to set up the connector: - -Jira data source:: -Dropdown to determine the Jira platform type: `Jira Cloud`, `Jira Server`, or `Jira Data Center`. Default value is `Jira Cloud`. - -Jira Data Center username:: -The username of the account for Jira Data Center. - -Jira Data Center password:: -The password of the account to be used for Jira Data Center. - -Jira Cloud service account id:: -Email address to authenticate with Jira Cloud. Example: jane.doe@example.com - -Jira Cloud API token:: -The API Token to authenticate with Jira Cloud. - -Jira Server username:: -The username of the account for Jira Server. - -Jira Server password:: -The password of the account to be used for Jira Server. - -Jira Cloud service account id:: -The account email for Jira Cloud. - -Jira Cloud API token:: -The API Token to authenticate with Jira Cloud. - -Jira host url:: -The domain where Jira is hosted. Examples: - -* https://192.158.1.38:8080/ -* https://test_user.atlassian.net/ - -Jira project keys:: -Comma-separated list of https://support.atlassian.com/jira-software-cloud/docs/what-is-an-issue/#Workingwithissues-Projectkeys[Project Keys^] to fetch data from Jira server or cloud. If the value is `*` the connector will fetch data from all projects present in the configured projects. Default value is `*`. Examples: - -* `EC`, `TP` -* `*` - -Enable SSL:: -Whether SSL verification will be enabled. Default value is `False`. - -SSL certificate:: -Content of SSL certificate. Note: In case of `ssl_enabled` is `False`, the `ssl_ca` value will be ignored. Example certificate: -+ -[source, txt] ----- ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- ----- - -Enable document level security:: -Toggle to enable <>. -When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -Access control syncs fetch users' access control lists and store them in a separate index. -+ -[NOTE] -==== -To access user data in Jira Administration, the account you created must be granted *Product Access* for Jira Administration. -This access needs to be provided by an administrator from the http://admin.atlassian.com/[Atlassian Admin], and the access level granted should be `Product Admin`. -==== - -[discrete#es-connectors-jira-documents-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* **Projects** -** Includes metadata such as description, project key, project type, lead name, etc. -* **Issues** -** All types of issues including Task, Bug, Sub-task, Enhancement, Story, etc. -** Includes metadata such as issue type, parent issue details, fix versions, affected versions, resolution, attachments, comments, sub-task details, priority, custom fields, etc. -* **Attachments** - -**Note:** Archived projects and issues are not indexed. - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced by default. -You must first enable <>. -Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-jira-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-jira-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -This connector supports <> for remote filtering. -These rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -[discrete#es-connectors-jira-sync-rules-examples] -====== Advanced sync rules example - -*Example 1*: Queries to index content based on status of Jira issues. - -[source,js] ----- -[ - { - "query": "project = Collaboration AND status = 'In Progress'" - }, - { - "query": "status IN ('To Do', 'In Progress', 'Closed')" - } -] ----- -// NOTCONSOLE - -*Example 2*: Query to index data based on priority of issues for given projects. - -[source,js] ----- -[ - { - "query": "priority in (Blocker, Critical) AND project in (ProjA, ProjB, ProjC)" - } -] ----- -// NOTCONSOLE - -*Example 3*: Query to index data based on assignee and created time. - -[source,js] ----- -[ - { - "query": "assignee is EMPTY and created < -1d" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-jira-document-level-security] -===== Document level security - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. -Refer to <> on this page for how to enable DLS for this connector. - -[WARNING] -==== -Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. This impacts the speed at which your content can be retrieved. -==== - -[WARNING] -==== -When the `data_source` is set to Confluence Data Center or Server, the connector will only fetch 1000 users for access control syncs, due a https://auth0.com/docs/manage-users/user-search/retrieve-users-with-get-users-endpoint#limitations[limitation in the API used^]. -==== - -[NOTE] -==== -Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. -The example uses SharePoint Online as the data source, but the same steps apply to every connector. -==== - -[discrete#es-connectors-jira-content-extraction] -===== Content Extraction - -See <>. - -[discrete#es-connectors-jira-known-issues] -===== Known issues - -* *Enabling document-level security impacts performance.* -+ -Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. This impacts the speed at which your content can be retrieved. - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-jira-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-jira-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-jira-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-jira-client-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. -This self-managed connector is compatible with Elastic versions *8.7.0+*. - -[NOTE] -==== -Jira Data Center support was added in 8.13.0 in technical preview and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Technical preview features are not subject to the support SLA of official GA features. -==== - -To use this connector, satisfy all <>. - -[discrete#es-connectors-jira-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-jira-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, see <> -For additional usage operations, see <>. - -[discrete#es-connectors-jira-client-compatability] -===== Compatibility - -* Jira Cloud, Jira Server, and Jira Data Center *versions 7 or later*. - -[discrete#es-connectors-jira-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/jira.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required to set up the connector: - -`data_source`:: -Dropdown to determine the Jira platform type: `Jira Cloud`, `Jira Server`, or `Jira Data Center`. Default value is `Jira Cloud`. - -`data_center_username`:: -The username of the account for Jira Data Center. - -`data_center_password`:: -The password of the account to be used for Jira Data Center. - -`username`:: -The username of the account for Jira Server. - -`password`:: -The password of the account to be used for Jira Server. - -`account_email`:: -Email address to authenticate with Jira Cloud. Example: jane.doe@example.com - -`api_token`:: -The API Token to authenticate with Jira Cloud. - -`jira_url`:: -The domain where Jira is hosted. Examples: - -* https://192.158.1.38:8080/ -* https://test_user.atlassian.net/ - -`projects`:: -Comma-separated list of https://support.atlassian.com/jira-software-cloud/docs/what-is-an-issue/#Workingwithissues-Projectkeys[Project Keys^] to fetch data from Jira server or cloud. If the value is `*` the connector will fetch data from all projects present in the configured projects. Default value is `*`. Examples: -+ -* `EC`, `TP` -* `*` -+ -[WARNING] -==== -This field can be bypassed by advanced sync rules. -==== - -`ssl_enabled`:: -Whether SSL verification will be enabled. Default value is `False`. - -`ssl_ca`:: -Content of SSL certificate. Note: In case of `ssl_enabled` is `False`, the `ssl_ca` value will be ignored. Example certificate: -+ -[source, txt] ----- ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- ----- - -`retry_count`:: -The number of retry attempts after failed request to Jira. Default value is 3. - -`concurrent_downloads`:: -The number of concurrent downloads for fetching the attachment content. This speeds up the content extraction of attachments. Defaults to 100. - -`use_document_level_security`:: -Toggle to enable <>. -When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -Access control syncs fetch users' access control lists and store them in a separate index. -+ -[NOTE] -==== -To access user data in Jira Administration, the account you created must be granted *Product Access* for Jira Administration. -This access needs to be provided by an administrator from the http://admin.atlassian.com/[Atlassian Admin], and the access level granted should be `Product Admin`. -==== - -`use_text_extraction_service`:: -Requires a separate deployment of the <>. -Requires that ingest pipeline settings disable text extraction. -Default value is `False`. - -[discrete#es-connectors-jira-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-jira-client-documents-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* **Projects** -** Includes metadata such as description, project key, project type, lead name, etc. -* **Issues** -** All types of issues including Task, Bug, Sub-task, Enhancement, Story, etc. -** Includes metadata such as issue type, parent issue details, fix versions, affected versions, resolution, attachments, comments, sub-task details, priority, custom fields, etc. -* **Attachments** - -**Note:** Archived projects and issues are not indexed. - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted -* Permissions are not synced by default. -You must first enable <>. -Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-jira-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-jira-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -This connector supports <> for remote filtering. -These rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -[discrete#es-connectors-jira-client-sync-rules-examples] -====== Advanced sync rules example - -*Example 1*: Queries to index content based on status of Jira issues. - -[source,js] ----- -[ - { - "query": "project = Collaboration AND status = 'In Progress'" - }, - { - "query": "status IN ('To Do', 'In Progress', 'Closed')" - } -] ----- -// NOTCONSOLE - -*Example 2*: Query to index data based on priority of issues for given projects. - -[source,js] ----- -[ - { - "query": "priority in (Blocker, Critical) AND project in (ProjA, ProjB, ProjC)" - } -] ----- -// NOTCONSOLE - -*Example 3*: Query to index data based on assignee and created time. - -[source,js] ----- -[ - { - "query": "assignee is EMPTY and created < -1d" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-jira-client-document-level-security] -===== Document level security - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. -Refer to <> on this page for how to enable DLS for this connector. - -[WARNING] -==== -Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. This impacts the speed at which your content can be retrieved. -==== - -[WARNING] -==== -When the `data_source` is set to Confluence Data Center or Server, the connector will only fetch 1000 users for access control syncs, due a https://auth0.com/docs/manage-users/user-search/retrieve-users-with-get-users-endpoint#limitations[limitation in the API used^]. -==== - -[NOTE] -==== -Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. -The example uses SharePoint Online as the data source, but the same steps apply to every connector. -==== - -[discrete#es-connectors-jira-client-content-extraction] -===== Content Extraction - -See <>. - -[discrete#es-connectors-jira-client-connector-client-operations] -===== Self-managed connector operations - -[discrete#es-connectors-jira-client-testing] -===== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the Jira connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=jira ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=jira DATA_SIZE=small ----- - -[discrete#es-connectors-jira-client-known-issues] -===== Known issues - -* *Enabling document-level security impacts performance.* -+ -Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. This impacts the speed at which your content can be retrieved. - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-jira-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-jira-client-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-known-issues.asciidoc b/docs/reference/connector/docs/connectors-known-issues.asciidoc deleted file mode 100644 index e8074df9526c6..0000000000000 --- a/docs/reference/connector/docs/connectors-known-issues.asciidoc +++ /dev/null @@ -1,166 +0,0 @@ -[#es-connectors-known-issues] -== Connector known issues -++++ -Known issues -++++ - -[discrete#es-connectors-known-issues-enterprie-search-service] -=== Enterprise Search service: self-managed connectors - -As of *8.10.0* self-managed connectors no longer require the Enterprise Search service to be running on your Elastic deployment. -However, if you are upgrading connectors from versions _earlier than 8.9_, you'll need to run Enterprise Search once to migrate your connectors to the new format. - -Some points to note about this migration: - -* This involves updating system indices that store configuration and sync history for your connectors. -* This is an in-place operation, meaning no temporary or backup indices will be created. -* Therefore, it is important to take a snapshot of the Elasticsearch cluster before upgrading— in the unlikely event of an index migration failure. - -If you have trouble with this migration, please contact support. - -[WARNING] -==== -To run self-managed connectors your self-deployed connector service version must match your Elasticsearch version. -For example, if you're running Elasticsearch 8.10.1, your connector service should be version 8.10.1.x. -Elastic does not support deployments running mismatched versions (except during upgrades). -==== - -[discrete#es-connectors-known-issues-connector-service] -=== Connector service - -The connector service has the following known issues: - -* *OOM errors when syncing large database tables* -+ -Syncs after the initial sync can cause out-of-memory (OOM) errors when syncing large database tables. -This occurs because database connectors load and store IDs in memory. -For tables with millions of records, this can lead to memory exhaustion if the connector service has insufficient RAM. -+ -To mitigate this issue, you can: -+ -** *Increase RAM allocation*: -*** *Elastic Cloud*: Upgrade the Enterprise Search instance to a larger size. Note that for Elastic managed connectors running on Elastic Cloud, the connector service runs on the Enterprise Search node. It only has access to up to 40% of the node’s RAM allocation. -*** *Self-managed*: Increase RAM allocation for the machine/container running the connector service. -+ -.RAM *sizing guidelines* -[%collapsible] -============== -The following table shows the estimated RAM usage for loading IDs into memory. -|=== -| *Number of IDs* | *Memory Usage in MB (2X buffer)* -| 1,000,000 -| ≈ 45.78 MB -| 10,000,000 -| ≈ 457.76 MB -| 50,000,000 -| ≈ 2288.82 MB (≈ 2.29 GB) -| 100,000,000 -| ≈ 4577.64 MB (≈ 4.58 GB) -|=== -============== -+ -** *Optimize* <>: -*** Review and optimize sync rules to filter and reduce data retrieved from the source before syncing. -+ -** *Use a self-managed connector* instead of a managed connector: -*** Because self-managed connectors run on your infrastructure, they are not subject to the same RAM limitations of the Enterprise Search node. - -* *Upgrades from deployments running on versions earlier than 8.9.0 can cause sync job failures* -+ -Due to a bug, the `job_type` field mapping will be missing after upgrading from deployments running on versions earlier than 8.9.0. -Sync jobs won't be displayed in the Kibana UI (job history) and the connector service won't be able to start new sync jobs. -*This will only occur if you have previously scheduled sync jobs.* -+ -To resolve this issue, you can manually add the missing field with the following command and trigger a sync job: -+ -[source,console] ----- -PUT .elastic-connectors-sync-jobs-v1/_mapping -{ - "properties": { - "job_type": { - "type": "keyword" - } - } -} ----- -// TEST[skip:TODO] - -* *The connector service will fail to sync when the connector tries to fetch more more than 2,147,483,647 (_2^31-1_) documents from a data source* -+ -A workaround is to manually partition the data to be synced using multiple search indices. -+ -* *Custom scheduling might break when upgrading from version 8.6 or earlier.* -+ -If you encounter the error `'custom_schedule_triggered': undefined method 'each' for nil:NilClass (NoMethodError)`, it means the custom scheduling feature migration failed. -You can use the following manual workaround: -+ -[source,console] ----- -POST /.elastic-connectors/_update/connector-id -{ - "doc": { - "custom_scheduling": {} - } -} ----- -// TEST[skip:TODO] -+ -This error can appear on Connectors or Crawlers that aren't the cause of the issue. -If the error continues, try running the above command for every document in the `.elastic-connectors` index. -+ -* *Connectors upgrading from 8.7 or earlier can be missing configuration fields* -+ -A connector that was created prior to 8.8 can sometimes be missing configuration fields. -This is a known issue for the MySQL connector but could also affect other connectors. -+ -If the self-managed connector raises the error `Connector for has missing configuration fields: , ...`, you can resolve the error by manually adding the missing configuration fields via the Dev Tools. -Only the following two field properties are required, as the rest will be autopopulated by the self-managed connector: -+ -** `type`: one of `str`, `int`, `bool`, or `list` -** `value`: any value, as long as it is of the correct `type` (`list` type values should be saved as comma-separated strings) -+ -[source,console] ----- -POST /.elastic-connectors/_update/connector_id -{ - "doc" : { - "configuration": { - "field_a": { - "type": "str", - "value": "" - }, - "field_b": { - "type": "bool", - "value": false - }, - "field_c": { - "type": "int", - "value": 1 - }, - "field_d": { - "type": "list", - "value": "a,b" - } - } - } -} ----- -// TEST[skip:TODO] -+ -* *Python connectors that upgraded from 8.7.1 will report document volumes in gigabytes (GB) instead of megabytes (MB)* -+ -As a result, true document volume will be under-reported by a factor of 1024. -+ -* *The following Elastic managed connectors will not run correctly on Elastic Cloud in 8.9.0.* -They are still available as self-managed connectors. -** Azure Blob Storage -** Confluence Cloud & Server -** Jira Cloud & Server -** Network drives - -[discrete#es-connectors-known-issues-specific] -=== Individual connector known issues - -Individual connectors may have additional known issues. -Refer to <> for connector-specific known issues. diff --git a/docs/reference/connector/docs/connectors-logs.asciidoc b/docs/reference/connector/docs/connectors-logs.asciidoc deleted file mode 100644 index 9e0000d442120..0000000000000 --- a/docs/reference/connector/docs/connectors-logs.asciidoc +++ /dev/null @@ -1,63 +0,0 @@ -[#es-connectors-logs] -=== Connector logs -++++ -Logs -++++ - -This document describes logs for <> and <>. - -[discrete#es-connectors-logs-enable] -==== Enable logs - -[discrete#es-connectors-logs-enable-cloud] -===== Elastic Cloud - -Elastic Cloud users need to {cloud}/ec-enable-logging-and-monitoring.html#ec-enable-logging-and-monitoring-steps[enable logging^] to view connector logs. -Go to *Cloud > Deployment > _your-deployment_ > Logs and metrics* to enable logs. - -Once enabled, <>. - -Configure the `log_level` user setting for Enterprise Search. See {cloud}/ec-manage-enterprise-search-settings.html[Add Enterprise Search user settings^] in the Elastic Cloud documentation. - -[discrete#es-connectors-logs-enable-self-managed] -===== self-managed connectors - -Note that self-managed deployments and self-managed connector logs are written to `STDOUT`. - -Self-managed connectors have the following logging options: - -* Use the `service.log_level` setting in your connector service configuration file to specify the log level for the service. -** Enable `elasticsearch.bulk.enable_operations_logging` to log the result of sending documents to Elasticsearch from connectors, for auditing and debugging. This setting depends on the `service.log_level` and will be logged at `DEBUG` level . -* Use the `elasticsearch.log_level` setting to specify the log level for the Elasticsearch _client_ used by the connector service. - -[discrete#es-connectors-logs-view] -==== View connector logs - -You can view logs in Kibana. - -You can filter by `service.type`: - -- `enterprise-search` -- `connectors` - -[discrete#es-connectors-logs-reference] -==== Logs reference - -Logs use Elastic Common Schema (ECS), without extensions. -See {ecs-ref}[the ECS Reference^] for more information. - -The fields logged are: - -* `@timestamp` -* `log.level` -* `ecs.version` -* `labels.index_date` -* `tags` -* `log.logger` -* `service.type` -* `service.version` -* `process.name` -* `process.pid` -* `process.thread.id` - -See {ref}/logging.html[Logging^] in the Elasticsearch documentation for more information. diff --git a/docs/reference/connector/docs/connectors-managed-service.asciidoc b/docs/reference/connector/docs/connectors-managed-service.asciidoc deleted file mode 100644 index 98144ed74bcfa..0000000000000 --- a/docs/reference/connector/docs/connectors-managed-service.asciidoc +++ /dev/null @@ -1,207 +0,0 @@ -[#es-native-connectors] -== Elastic managed connectors - -.Naming history -**** -Elastic managed connectors were initially known as "native connectors". -You might find this term in older documentation. -**** - -Managed <> are available directly within your Elastic Cloud deployment. -No additional infrastructure is required. - -Managed connectors sync data sources directly to Elasticsearch indices. -Create these indices using the *Connector* workflow within {kib}. - -The following connectors are available as Elastic managed connectors. -Refer to each connector reference for additional information specific to each connector. - -include::_connectors-list-native.asciidoc[] - -[discrete#es-native-connectors-prerequisites] -=== Availability and prerequisites - -Managed connectors were introduced in Elastic version *8.5.0*. - -Your Elastic Cloud deployment must include the following Elastic services: - -* *Elasticsearch* -* *{kib}* -* {enterprise-search-ref}/server.html[*Enterprise Search*] - -Refer to _Native Integrations_ on the https://www.elastic.co/subscriptions/cloud[Elastic subscriptions page], in the *Elastic Search* section for managed connector licensing requirements. - -.Elastic Cloud IPs -[sidebar] --- -Using our Elastic managed connectors involves outbound data transfer (egress) from your Elastic Cloud deployment. -If you have IP/firewall rules on your third party service, you'll need to add the Elastic Cloud egress static IP ranges to your service’s configuration. - -Refer to {cloud}/ec-static-ips.html[static IP ranges] in the Elastic Cloud documentation for the latest list of IP addresses. --- - -[discrete#es-native-connectors-usage] -=== Usage in {kib} UI - -Follow the *Connector* workflow in {kib} to select the *Connector* ingestion method. -Choose a data source, create an Elasticsearch index, and configure a managed connector to manage the index. - -[.screenshot] -image::images/use-a-connector-workflow.png[] - -[discrete#es-native-connectors-select-connector] -==== Select a connector - -Choose the data source to sync from the available options and select *Continue*. - -[discrete#es-native-connectors-index] -==== Create index - -Create a new index to be managed by the connector: - -. Name your index and optionally change the language analyzer to match the human language of your data source. -(The index name will be automatically prefixed with `search-`.) -. Select *Create index*. - -The index is created and ready to <>. - -This operation requires: - -. Access to {kib} -. Permission to create or manage the index -. `write` {ref}/security-privileges.html[indices privilege^] for the `.elastic-connectors` index -. `manage_api_key` {ref}/security-privileges.html[cluster privilege^] to allow the API key to be created -. `write_connector_secrets` {ref}/security-privileges.html[cluster privilege^] to allow the API key to be stored as a secret - -[discrete#es-native-connectors-configuration] -==== Configure connector - -Create a new index to be managed by the connector. - -Continue from above, or navigate to the following location within the {kib} UI: - -*Search > Content > Elasticsearch indices* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. - -Choose the index to configure, and then choose the *Configuration* tab. - -Configure the connector: - -. Edit the name and description for the connector. -Your team can use this information to differentiate this index from other connector indices. -(These fields describe the _connector_ and are independent of the Elasticsearch index name.) -. Save your changes. -. Edit the data source configuration. -The fields here vary by connector. -Refer to the documentation for each connector for details (refer to list of Elastic managed connectors, above). -Refer to <> for security considerations. -. Save your changes. - -Optionally choose *Edit sync schedule* to begin <>. - -This operation requires: - -. Access to {kib} -. Permission to create or manage the index -. `write` {ref}/security-privileges.html[indices privilege^] for the `.elastic-connectors` index -. `manage_api_key` {ref}/security-privileges.html[cluster privilege^] to allow the API key to be created -. `write_connector_secrets` {ref}/security-privileges.html[cluster privilege^] to allow the API key to be stored as a secret - -[discrete#es-native-connectors-management] -==== Manage connector - -To manage documents, syncs, sync rules, ingest pipelines, and other connector features, refer to <>. - -[discrete#es-native-connectors-manage-API-keys] -=== Manage API keys - -[NOTE] -==== -API keys for Elastic managed connectors were introduced in Elastic version *8.13.0*. -Managed connectors created in earlier versions will not automatically use API keys upon upgrading to *8.13.0*. -Refer to <> for more information. -==== - -Managed connectors communicate with Elasticsearch using API keys. -When managing a managed connector through the Connectors page in the {kib} UI, API key management is handled automatically. -API keys for Elastic managed connectors have the `manage` permission for three indices: - -. The attached index -. The access control (ACL) index used for document level security -. The internal `.elastic-connectors` index. - -Changing the attached index through {kib} will automatically invalidate the existing API key and generate a new one. -If you want to rotate an existing API key, navigate to the *Configuration* tab. -Scroll down to *Manage API key* and select *Generate API key*. -This action will invalidate the previous API key, create a new API key, and update the connector secret. - -API keys for Elastic managed connectors are stored on an internal system index called `.connector-secrets`. -This index can only be written to through API requests by users with the `write_connector-secrets` cluster privilege. -Only the Enterprise Search instance has permission to read from this index. - -Users managing Elastic managed connectors will need the `write_connector_secrets` cluster privilege assigned to their role. -Refer to <> for security considerations. - -[discrete#es-native-connectors-manage-API-keys-programmatically] -==== Manage API keys programmatically - -You can also create and store API keys programmatically. - -.*Expand* the following section for details. -[%collapsible] -=================================== -include::_connectors-create-native-api-key.asciidoc[] -=================================== - -[discrete#es-native-connectors-enabling-API-keys-for-upgraded-connectors] -==== Enabling API keys for upgraded connectors - -Managed connectors created before *8.13.0* do not initially have API keys upon upgrading. -The attached indices of these connectors cannot be changed until the connector has been converted to use an API key. - -.*Expand* the following section for steps on enabling API keys for upgraded Elastic managed connectors. -[%collapsible] -=================================== -. Run the following command in *Dev Tools* to enable API keys for the connector, replacing values where indicated. -+ -[source, console,subs="+attributes"] ----- -POST .elastic-connectors/_update/connector_id -{ - "doc": { - "features": { - "native_connector_api_keys": { - "enabled": true - } - } - } -} ----- -// TEST[skip:requires connector_id] -+ -. Go back to the Connectors page and navigate to the *Configuration* tab. -. Scroll down to *Manage API key* and select *Generate API key*. - -Your managed connector is now using API keys to authorize ingestion into Elasticsearch. -=================================== - -[discrete#es-native-connectors-usage-api] -=== Usage via API - -In 8.12 we introduced a set of {ref}/connector-apis.html[Connector APIs] to create and manage Elastic connectors and sync jobs, along with a https://github.com/elastic/connectors/blob/main/docs/CLI.md[CLI tool]. -Use these tools if you'd like to work with connectors and sync jobs programmatically. - -[discrete#es-native-connectors-example] -=== End-to-end example - -The following example demonstrates how to use a managed connector on Elastic Cloud: <>. - -[discrete#es-native-connectors-convert] -=== Convert a managed connector - -You can convert a managed connector to a self-managed connector to be run on your own infrastructure. -You'll find instructions in the UI on the connector index's overview page. - -[WARNING] -==== -Converting a managed connector to a self-managed connector is an irreversible operation! -==== diff --git a/docs/reference/connector/docs/connectors-management.asciidoc b/docs/reference/connector/docs/connectors-management.asciidoc deleted file mode 100644 index 77f8b32cb0b05..0000000000000 --- a/docs/reference/connector/docs/connectors-management.asciidoc +++ /dev/null @@ -1,9 +0,0 @@ -[#es-connectors-management] -== Management topics - -Refer to the following sections: - -* <> -* <> -* <> -* <> \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-mongodb.asciidoc b/docs/reference/connector/docs/connectors-mongodb.asciidoc deleted file mode 100644 index 8e62437507009..0000000000000 --- a/docs/reference/connector/docs/connectors-mongodb.asciidoc +++ /dev/null @@ -1,777 +0,0 @@ -[#es-connectors-mongodb] -=== Elastic MongoDB connector reference -++++ -MongoDB -++++ -// Attributes used in this file -:service-name: MongoDB -:service-name-stub: mongodb - -The _Elastic MongoDB connector_ is a <> for https://www.mongodb.com[MongoDB^] data sources. -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-mongodb-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-mongodb-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* in Elastic versions *8.5.0 and later*. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[discrete#es-connectors-mongodb-compatibility] -===== Compatibility - -This connector is compatible with *MongoDB Atlas* and *MongoDB 3.6 and later*. - -The data source and your Elastic deployment must be able to communicate with each other over a network. - -[discrete#es-connectors-mongodb-configuration] -===== Configuration - -Each time you create an index to be managed by this connector, you will create a new connector configuration. -You will need some or all of the following information about the data source. - -Server hostname:: -The URI of the MongoDB host. -Examples: -+ -* `mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority` -* `mongodb://127.0.0.1:27017` - -Username:: -The MongoDB username the connector will use. -+ -The user must have access to the configured database and collection. -You may want to create a dedicated, read-only user for each connector. - -Password:: -The MongoDB password the connector will use. - -Database:: -The MongoDB database to sync. -The database must be accessible using the configured username and password. - -Collection:: -The MongoDB collection to sync. -The collection must exist within the configured database. -The collection must be accessible using the configured username and password. - -Direct connection:: -Toggle to use the https://www.mongodb.com/docs/ruby-driver/current/reference/create-client/#direct-connection[direct connection option for the MongoDB client^]. -Disabled by default. - -SSL/TLS Connection:: -Toggle to establish a secure connection to the MongoDB server using SSL/TLS encryption. -Ensure that your MongoDB deployment supports SSL/TLS connections. -*Enable* if your MongoDB cluster uses DNS SRV records (namely MongoDB Atlas users). -+ -Disabled by default. - -Certificate Authority (.pem):: -Specifies the root certificate from the Certificate Authority. -The value of the certificate is used to validate the certificate presented by the MongoDB instance. -[TIP] -==== -Atlas users can leave this blank because https://www.mongodb.com/docs/atlas/reference/faq/security/#which-certificate-authority-signs-mongodb-atlas-tls-certificates-[Atlas uses a widely trusted root CA]. -==== - -Skip certificate verification:: -Skips various certificate validations (if SSL is enabled). -Disabled by default. -[NOTE] -==== -We strongly recommend leaving this option disabled in production environments. -==== - -[discrete#es-connectors-mongodb-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-mongodb-usage] -===== Usage - -To use this connector as a *managed connector*, use the *Connector* workflow. -See <>. - -For additional operations, see <>. - -[discrete#es-connectors-mongodb-example] -===== Example - -An example is available for this connector. -See <>. - -[discrete#es-connectors-mongodb-known-issues] -===== Known issues - -[discrete#es-connectors-mongodb-known-issues-ssl-tls-812] -====== SSL must be enabled for MongoDB Atlas - -* A bug introduced in *8.12.0* causes the connector to fail to sync Mongo *Atlas* urls (`mongo+srv`) unless SSL/TLS is enabled. -// https://github.com/elastic/sdh-enterprise-search/issues/1283#issuecomment-1919731668 - -[discrete#es-connectors-mongodb-known-issues-expressions-and-variables-in-aggregation-pipelines] -====== Expressions and variables in aggregation pipelines - -It's not possible to use expressions like `new Date()` inside an aggregation pipeline. -These expressions won't be evaluated by the underlying MongoDB client, but will be passed as a string to the MongoDB instance. -A possible workaround is to use https://www.mongodb.com/docs/manual/reference/aggregation-variables/[aggregation variables]. - -Incorrect (`new Date()` will be interpreted as string): -[source,js] ----- -{ - "aggregate": { - "pipeline": [ - { - "$match": { - "expiresAt": { - "$gte": "new Date()" - } - } - } - ] - } -} ----- -// NOTCONSOLE - -Correct (usage of https://www.mongodb.com/docs/manual/reference/aggregation-variables/#mongodb-variable-variable.NOW[$$NOW]): -[source,js] ----- -{ - "aggregate": { - "pipeline": [ - { - "$addFields": { - "current_date": { - "$toDate": "$$NOW" - } - } - }, - { - "$match": { - "$expr": { - "$gte": [ - "$expiresAt", - "$current_date" - ] - } - } - } - ] - } -} ----- -// NOTCONSOLE - -[discrete#es-connectors-mongodb-known-issues-tls-with-invalid-cert] -====== Connecting with self-signed or custom CA TLS Cert - -Currently, the MongoDB connector does not support working with self-signed or custom CA certs when connecting to your self-managed MongoDB host. - -[WARNING] -==== -The following workaround should not be used in production. -==== - -This can be worked around in development environments, by appending certain query parameters to the configured host. - -For example, if your host is `mongodb+srv://my.mongo.host.com`, appending `?tls=true&tlsAllowInvalidCertificates=true` will allow disabling TLS certificate verification. - -The full host in this example will look like this: - -`mongodb+srv://my.mongo.host.com/?tls=true&tlsAllowInvalidCertificates=true` - -See <> for any issues affecting all connectors. - -[discrete#es-connectors-mongodb-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-mongodb-security] -===== Security - -See <>. - -[discrete#es-connectors-mongodb-syncs] -===== Documents and syncs - -The following describes the default syncing behavior for this connector. -Use <> and {ref}/ingest-pipeline-search.html[ingest pipelines] to customize syncing for specific indices. - -All documents in the configured MongoDB database and collection are extracted and transformed into documents in your Elasticsearch index. - -* The connector creates one *Elasticsearch document* for each MongoDB document in the configured database and collection. -* For each document, the connector transforms each MongoDB field into an *Elasticsearch field*. -* For each field, Elasticsearch {ref}/dynamic-mapping.html[dynamically determines the *data type*^]. - -This results in Elasticsearch documents that closely match the original MongoDB documents. - -The Elasticsearch mapping is created when the first document is created. - -Each sync is a "full" sync. -For each MongoDB document discovered: - -* If it does not exist, the document is created in Elasticsearch. -* If it already exists in Elasticsearch, the Elasticsearch document is replaced and the version is incremented. -* If an existing Elasticsearch document no longer exists in the MongoDB collection, it is deleted from Elasticsearch. -* Embedded documents are stored as an `object` field in the parent document. - -This is recursive, because embedded documents can themselves contain embedded documents. - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted -* Permissions are not synced. All documents indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-mongodb-sync-rules] -===== Sync rules - -The following sections describe <> for this connector. - -<> are identical for all connectors and are available by default. - -<> for MongoDB can be used to express either `find` queries or aggregation pipelines. -They can also be used to tune options available when issuing these queries/pipelines. - -[discrete#es-connectors-mongodb-sync-rules-find] -====== `find` queries - -[NOTE] -==== -You must create a https://www.mongodb.com/docs/current/core/indexes/index-types/index-text/[text index^] on the MongoDB collection in order to perform text searches. -==== - -For `find` queries, the structure of this JSON DSL should look like: - -[source,js] ----- -{ - "find":{ - "filter": { - // find query goes here - }, - "options":{ - // query options go here - } - } -} - ----- -// NOTCONSOLE - -For example: - -[source,js] ----- -{ - "find": { - "filter": { - "$text": { - "$search": "garden", - "$caseSensitive": false - } - }, - "skip": 10, - "limit": 1000 - } -} ----- -// NOTCONSOLE - -`find` queries also support additional options, for example the `projection` object: - -[source,js] ----- -{ - "find": { - "filter": { - "languages": [ - "English" - ], - "runtime": { - "$gt":90 - } - }, - "projection":{ - "tomatoes": 1 - } - } -} ----- -// NOTCONSOLE -Where the available options are: - -* `allow_disk_use` (true, false) — When set to true, the server can write temporary data to disk while executing the find operation. This option is only available on MongoDB server versions 4.4 and newer. -* `allow_partial_results` (true, false) — Allows the query to get partial results if some shards are down. -* `batch_size` (Integer) — The number of documents returned in each batch of results from MongoDB. -* `filter` (Object) — The filter criteria for the query. -* `limit` (Integer) — The max number of docs to return from the query. -* `max_time_ms` (Integer) — The maximum amount of time to allow the query to run, in milliseconds. -* `no_cursor_timeout` (true, false) — The server normally times out idle cursors after an inactivity period (10 minutes) to prevent excess memory use. Set this option to prevent that. -* `projection` (Array, Object) — The fields to include or exclude from each doc in the result set. If an array, it should have at least one item. -* `return_key` (true, false) — Return index keys rather than the documents. -* `show_record_id` (true, false) — Return the `$recordId` for each doc in the result set. -* `skip` (Integer) — The number of docs to skip before returning results. - -[discrete#es-connectors-mongodb-sync-rules-aggregation] -====== Aggregation pipelines - -Similarly, for aggregation pipelines, the structure of the JSON DSL should look like: - -[source,js] ----- -{ - "aggregate":{ - "pipeline": [ - // pipeline elements go here - ], - "options": { - // pipeline options go here - } - } -} ----- -// NOTCONSOLE - -Where the available options are: - -* `allowDiskUse` (true, false) — Set to true if disk usage is allowed during the aggregation. -* `batchSize` (Integer) — The number of documents to return per batch. -* `bypassDocumentValidation` (true, false) — Whether or not to skip document level validation. -* `collation` (Object) — The collation to use. -* `comment` (String) — A user-provided comment to attach to this command. -* `hint` (String) — The index to use for the aggregation. -* `let` (Object) — Mapping of variables to use in the pipeline. See the server documentation for details. -* `maxTimeMs` (Integer) — The maximum amount of time in milliseconds to allow the aggregation to run. - -[discrete#es-connectors-mongodb-migration-from-ruby] -===== Migrating from the Ruby connector framework - -As part of the 8.8.0 release the MongoDB connector was moved from the {connectors-python}[Ruby connectors framework^] to the {connectors-python}[Elastic connector framework^]. - -This change introduces minor formatting modifications to data ingested from MongoDB: - -1. Nested object id field name has changed from "_id" to "id". For example, if you had a field "customer._id", this will now be named "customer.id". -2. Date format has changed from `YYYY-MM-DD'T'HH:mm:ss.fff'Z'` to `YYYY-MM-DD'T'HH:mm:ss` - -If your MongoDB connector stopped working after migrating from 8.7.x to 8.8.x, read the workaround outlined in <>. -If that does not work, we recommend deleting the search index attached to this connector and re-creating a MongoDB connector from scratch. - - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-mongodb-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-mongodb-client-prerequisites] -===== Availability and prerequisites - -This connector is also available as a *self-managed connector* from the *Elastic connector framework*. -To use this connector as a self-managed connector, satisfy all <>. - -[discrete#es-connectors-mongodb-client-compatibility] -===== Compatibility - -This connector is compatible with *MongoDB Atlas* and *MongoDB 3.6 and later*. - -The data source and your Elastic deployment must be able to communicate with each other over a network. - -[discrete#es-connectors-mongodb-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/jira.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required to set up the connector: - -`host`:: -The URI of the MongoDB host. -Examples: -+ -* `mongodb+srv://my_username:my_password@cluster0.mongodb.net/mydb?w=majority` -* `mongodb://127.0.0.1:27017` - - -`user`:: -The MongoDB username the connector will use. -+ -The user must have access to the configured database and collection. -You may want to create a dedicated, read-only user for each connector. - -`password`:: -The MongoDB password the connector will use. - -[NOTE] -==== -Anonymous authentication is supported for _testing purposes only_, but should not be used in production. -Omit the username and password, to use default values. -==== - -`database`:: -The MongoDB database to sync. -The database must be accessible using the configured username and password. - -`collection`:: -The MongoDB collection to sync. -The collection must exist within the configured database. -The collection must be accessible using the configured username and password. - -`direct_connection`:: -Whether to use the https://www.mongodb.com/docs/ruby-driver/current/reference/create-client/#direct-connection[direct connection option for the MongoDB client^]. -Default value is `False`. - -`ssl_enabled`:: -Whether to establish a secure connection to the MongoDB server using SSL/TLS encryption. -Ensure that your MongoDB deployment supports SSL/TLS connections. -*Enable* if your MongoDB cluster uses DNS SRV records (namely MongoDB Atlas users). -+ -Default value is `False`. - -`ssl_ca`:: -Specifies the root certificate from the Certificate Authority. -The value of the certificate is used to validate the certificate presented by the MongoDB instance. -[TIP] -==== -Atlas users can leave this blank because https://www.mongodb.com/docs/atlas/reference/faq/security/#which-certificate-authority-signs-mongodb-atlas-tls-certificates-[Atlas uses a widely trusted root CA]. -==== - -`tls_insecure`:: -Skips various certificate validations (if SSL is enabled). -Default value is `False`. -[NOTE] -==== -We strongly recommend leaving this option disabled in production environments. -==== - -[discrete#es-connectors-mongodb-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-mongodb-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, see <> -For additional usage operations, see <>. - -[discrete#es-connectors-mongodb-client-example] -===== Example - -An example is available for this connector. -See <>. - -[discrete#es-connectors-mongodb-client-known-issues] -===== Known issues - -[discrete#es-connectors-mongodb-client-known-issues-ssl-tls-812] -====== SSL must be enabled for MongoDB Atlas - -* A bug introduced in *8.12.0* causes the connector to fail to sync Mongo *Atlas* urls (`mongo+srv`) unless SSL/TLS is enabled. -// https://github.com/elastic/sdh-enterprise-search/issues/1283#issuecomment-1919731668 - - -[discrete#es-connectors-mongodb-client-known-issues-expressions-and-variables-in-aggregation-pipelines] -====== Expressions and variables in aggregation pipelines - -It's not possible to use expressions like `new Date()` inside an aggregation pipeline. -These expressions won't be evaluated by the underlying MongoDB client, but will be passed as a string to the MongoDB instance. -A possible workaround is to use https://www.mongodb.com/docs/manual/reference/aggregation-variables/[aggregation variables]. - -Incorrect (`new Date()` will be interpreted as string): -[source,js] ----- -{ - "aggregate": { - "pipeline": [ - { - "$match": { - "expiresAt": { - "$gte": "new Date()" - } - } - } - ] - } -} ----- -// NOTCONSOLE - -Correct (usage of https://www.mongodb.com/docs/manual/reference/aggregation-variables/#mongodb-variable-variable.NOW[$$NOW]): -[source,js] ----- -{ - "aggregate": { - "pipeline": [ - { - "$addFields": { - "current_date": { - "$toDate": "$$NOW" - } - } - }, - { - "$match": { - "$expr": { - "$gte": [ - "$expiresAt", - "$current_date" - ] - } - } - } - ] - } -} ----- -// NOTCONSOLE - -[discrete#es-connectors-mongodb-client-known-issues-tls-with-invalid-cert] -====== Connecting with self-signed or custom CA TLS Cert - -Currently, the MongoDB connector does not support working with self-signed or custom CA certs when connecting to your self-managed MongoDB host. - -[WARNING] -==== -The following workaround should not be used in production. -==== - -This can be worked around in development environments, by appending certain query parameters to the configured host. - -For example, if your host is `mongodb+srv://my.mongo.host.com`, appending `?tls=true&tlsAllowInvalidCertificates=true` will allow disabling TLS certificate verification. - -The full host in this example will look like this: - -`mongodb+srv://my.mongo.host.com/?tls=true&tlsAllowInvalidCertificates=true` - -[discrete#es-connectors-mongodb-known-issues-docker-image-fails] -====== Docker image errors out for versions 8.12.0 and 8.12.1 - -A bug introduced in *8.12.0* causes the Connectors docker image to error out if run using MongoDB as its source. -The command line will output the error `cannot import name 'coroutine' from 'asyncio'`. -** This issue is fixed in versions *8.12.2* and *8.13.0*. -** This bug does not affect Elastic managed connectors. - -See <> for any issues affecting all connectors. - -[discrete#es-connectors-mongodb-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-mongodb-client-security] -===== Security - -See <>. - -[discrete#es-connectors-mongodb-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-mongodb-client-syncs] -===== Documents and syncs - -The following describes the default syncing behavior for this connector. -Use <> and {ref}/ingest-pipeline-search.html[ingest pipelines] to customize syncing for specific indices. - -All documents in the configured MongoDB database and collection are extracted and transformed into documents in your Elasticsearch index. - -* The connector creates one *Elasticsearch document* for each MongoDB document in the configured database and collection. -* For each document, the connector transforms each MongoDB field into an *Elasticsearch field*. -* For each field, Elasticsearch {ref}/dynamic-mapping.html[dynamically determines the *data type*^]. - -This results in Elasticsearch documents that closely match the original MongoDB documents. - -The Elasticsearch mapping is created when the first document is created. - -Each sync is a "full" sync. -For each MongoDB document discovered: - -* If it does not exist, the document is created in Elasticsearch. -* If it already exists in Elasticsearch, the Elasticsearch document is replaced and the version is incremented. -* If an existing Elasticsearch document no longer exists in the MongoDB collection, it is deleted from Elasticsearch. -* Embedded documents are stored as an `object` field in the parent document. - -This is recursive, because embedded documents can themselves contain embedded documents. - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted -* Permissions are not synced. All documents indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-mongodb-client-sync-rules] -===== Sync rules - -The following sections describe <> for this connector. - -<> are identical for all connectors and are available by default. - -<> for MongoDB can be used to express either `find` queries or aggregation pipelines. -They can also be used to tune options available when issuing these queries/pipelines. - -[discrete#es-connectors-mongodb-client-sync-rules-find] -====== `find` queries - -[NOTE] -==== -You must create a https://www.mongodb.com/docs/current/core/indexes/index-types/index-text/[text index^] on the MongoDB collection in order to perform text searches. -==== - -For `find` queries, the structure of this JSON DSL should look like: - -[source,js] ----- -{ - "find":{ - "filter": { - // find query goes here - }, - "options":{ - // query options go here - } - } -} - ----- -// NOTCONSOLE - -For example: - -[source,js] ----- -{ - "find": { - "filter": { - "$text": { - "$search": "garden", - "$caseSensitive": false - } - }, - "skip": 10, - "limit": 1000 - } -} ----- -// NOTCONSOLE - -`find` queries also support additional options, for example the `projection` object: - -[source,js] ----- -{ - "find": { - "filter": { - "languages": [ - "English" - ], - "runtime": { - "$gt":90 - } - }, - "projection":{ - "tomatoes": 1 - } - } -} ----- -// NOTCONSOLE -Where the available options are: - -* `allow_disk_use` (true, false) — When set to true, the server can write temporary data to disk while executing the find operation. This option is only available on MongoDB server versions 4.4 and newer. -* `allow_partial_results` (true, false) — Allows the query to get partial results if some shards are down. -* `batch_size` (Integer) — The number of documents returned in each batch of results from MongoDB. -* `filter` (Object) — The filter criteria for the query. -* `limit` (Integer) — The max number of docs to return from the query. -* `max_time_ms` (Integer) — The maximum amount of time to allow the query to run, in milliseconds. -* `no_cursor_timeout` (true, false) — The server normally times out idle cursors after an inactivity period (10 minutes) to prevent excess memory use. Set this option to prevent that. -* `projection` (Array, Object) — The fields to include or exclude from each doc in the result set. If an array, it should have at least one item. -* `return_key` (true, false) — Return index keys rather than the documents. -* `show_record_id` (true, false) — Return the `$recordId` for each doc in the result set. -* `skip` (Integer) — The number of docs to skip before returning results. - -[discrete#es-connectors-mongodb-client-sync-rules-aggregation] -====== Aggregation pipelines - -Similarly, for aggregation pipelines, the structure of the JSON DSL should look like: - -[source,js] ----- -{ - "aggregate":{ - "pipeline": [ - // pipeline elements go here - ], - "options": { - // pipeline options go here - } - } -} ----- -// NOTCONSOLE - -Where the available options are: - -* `allowDiskUse` (true, false) — Set to true if disk usage is allowed during the aggregation. -* `batchSize` (Integer) — The number of documents to return per batch. -* `bypassDocumentValidation` (true, false) — Whether or not to skip document level validation. -* `collation` (Object) — The collation to use. -* `comment` (String) — A user-provided comment to attach to this command. -* `hint` (String) — The index to use for the aggregation. -* `let` (Object) — Mapping of variables to use in the pipeline. See the server documentation for details. -* `maxTimeMs` (Integer) — The maximum amount of time in milliseconds to allow the aggregation to run. - -[discrete#es-connectors-mongodb-client-migration-from-ruby] -===== Migrating from the Ruby connector framework - -As part of the 8.8.0 release the MongoDB connector was moved from the {connectors-python}[Ruby connectors framework^] to the {connectors-python}[Elastic connector framework^]. - -This change introduces minor formatting modifications to data ingested from MongoDB: - -1. Nested object id field name has changed from "_id" to "id". For example, if you had a field "customer._id", this will now be named "customer.id". -2. Date format has changed from `YYYY-MM-DD'T'HH:mm:ss.fff'Z'` to `YYYY-MM-DD'T'HH:mm:ss` - -If your MongoDB connector stopped working after migrating from 8.7.x to 8.8.x, read the workaround outlined in <>. -If that does not work, we recommend deleting the search index attached to this connector and re-creating a MongoDB connector from scratch. - - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-ms-sql.asciidoc b/docs/reference/connector/docs/connectors-ms-sql.asciidoc deleted file mode 100644 index d706af8ca8043..0000000000000 --- a/docs/reference/connector/docs/connectors-ms-sql.asciidoc +++ /dev/null @@ -1,594 +0,0 @@ -[#es-connectors-ms-sql] -=== Elastic Microsoft SQL connector reference -++++ -Microsoft SQL -++++ -// Attributes used in this file: -:service-name: Microsoft SQL -:service-name-stub: mssql - -The _Elastic Microsoft SQL connector_ is a <> for https://learn.microsoft.com/en-us/sql/[Microsoft SQL^] databases. -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-ms-sql-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-ms-sql-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* in Elastic versions *8.8.0 and later*. -To use this connector natively in Elastic Cloud, satisfy all <>. - -[discrete#es-connectors-{service_type}-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-ms-sql-usage] -===== Usage - -To use this connector as a *managed connector*, use the *Connector* workflow. -See <>. - -Users require the `sysadmin` SQL Server role. -Note that SQL Server Authentication is required. -Windows Authentication is not supported. - -For additional operations, see <>. - -[discrete#es-connectors-ms-sql-compatability] -===== Compatibility - -The following are compatible with Elastic connector frameworks: - -* Microsoft SQL Server versions 2017, 2019 -* Azure SQL -* Amazon RDS for SQL Server - -[discrete#es-connectors-ms-sql-configuration] -===== Configuration - -The following configuration fields are required to set up the connector: - -Host:: -The server host address where the Microsoft SQL Server is hosted. -Default value is `127.0.0.1`. -Examples: -+ -* `192.158.1.38` -* `demo.instance.demo-region.demo.service.com` - -Port:: -The port where the Microsoft SQL Server is hosted. Default value is `1433`. - -Username:: -The username of the account for Microsoft SQL Server (SQL Server Authentication only). - -Password:: -The password of the account to be used for the Microsoft SQL Server (SQL Server Authentication only). - -Database:: -Name of the Microsoft SQL Server database. -Examples: -+ -* `employee_database` -* `customer_database` - -Comma-separated list of tables:: -List of tables, separated by commas. -The Microsoft SQL connector will fetch data from all tables present in the configured database, if the value is `*` . -Default value is `*`. -Examples: -+ -* `table_1, table_2` -* `*` -+ -[WARNING] -==== -This field can be bypassed by advanced sync rules. -==== - -Schema:: -Name of the Microsoft SQL Server schema. -Default value is `dbo`. -+ -Examples: -+ -* `dbo` -* `custom_schema` - -Enable SSL:: -Toggle to enable SSL verification. -Default value is `False`. - -SSL certificate:: -Content of SSL certificate. -If SSL is disabled, the `ssl_ca` value will be ignored. -+ -.*Expand* to see an example certificate -[%collapsible] -==== -``` ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- -``` -==== - -Validate host:: -Toggle to enable host validation. -Default value is `False`. - -[discrete#es-connectors-ms-sql-documents-syncs] -===== Documents and syncs - -* Tables with no primary key defined are skipped. -* If the `last_user_update` of `sys.dm_db_index_usage_stats` table is not available for a specific table and database then all data in that table will be synced. - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. -**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-ms-sql-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. -For more information read <>. - -[discrete#es-connectors-ms-sql-sync-rules-advanced] -====== Advanced sync rules - -This connector supports <> for remote filtering. -These rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -Here are a few examples of advanced sync rules for this connector. - -.*Expand* to see example data -[%collapsible] -==== - -*`employee` table* - -[cols="3*", options="header"] -|=== -| emp_id | name | age -| 3 | John | 28 -| 10 | Jane | 35 -| 14 | Alex | 22 -|=== - -* -*`customer` table* - -[cols="3*", options="header"] -|=== -| c_id | name | age -| 2 | Elm | 24 -| 6 | Pine | 30 -| 9 | Oak | 34 -|=== -==== - -[discrete#es-connectors-ms-sql-sync-rules-advanced-queries] -*Example: Two queries* - -These rules fetch all records from both the `employee` and `customer` tables. The data from these tables will be synced separately to Elasticsearch. - -[source,js] ----- -[ - { - "tables": [ - "employee" - ], - "query": "SELECT * FROM employee" - }, - { - "tables": [ - "customer" - ], - "query": "SELECT * FROM customer" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-ms-sql-sync-rules-example-one-where] -*Example: One WHERE query* - -This rule fetches only the records from the `employee` table where the `emp_id` is greater than 5. Only these filtered records will be synced to Elasticsearch. - -[source,js] ----- -[ - { - "tables": ["employee"], - "query": "SELECT * FROM employee WHERE emp_id > 5" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-ms-sql-sync-rules-example-one-join] -*Example: One JOIN query* - -This rule fetches records by performing an INNER JOIN between the `employee` and `customer` tables on the condition that the `emp_id` in `employee` matches the `c_id` in `customer`. The result of this combined data will be synced to Elasticsearch. - -[source,js] ----- -[ - { - "tables": ["employee", "customer"], - "query": "SELECT * FROM employee INNER JOIN customer ON employee.emp_id = customer.c_id" - } -] ----- -// NOTCONSOLE - -[WARNING] -==== -When using advanced rules, a query can bypass the configuration field `tables`. -This will happen if the query specifies a table that doesn't appear in the configuration. -This can also happen if the configuration specifies `*` to fetch all tables while the advanced sync rule requests for only a subset of tables. -==== - -[discrete#es-connectors-ms-sql-known-issues] -===== Known issues - -There are no known issues for this connector. -See <> for any issues affecting all connectors. - -[discrete#es-connectors-ms-sql-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-ms-sql-security] -===== Security - -See <>. - - - - -This connector uses the https://github.com/elastic/connectors/blob/{branch}/connectors/sources/generic_database.py[generic database connector source code^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -View {connectors-python}/connectors/sources/mssql.py[additional code specific to this data source^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-ms-sql-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-ms-sql-client-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. -To use this connector, satisfy all <>. - -[discrete#es-connectors-{service_type}-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-ms-sql-client-usage] -===== Usage - -Users require the `sysadmin` server role. -Note that SQL Server Authentication is required. -Windows Authentication is not supported. - -To use this connector as a *self-managed connector*, see <> -For additional usage operations, see <>. - -[discrete#es-connectors-ms-sql-client-compatability] -===== Compatibility - -The following are compatible with Elastic connector frameworks: - -* Microsoft SQL Server versions 2017, 2019 -* Azure SQL -* Amazon RDS for SQL Server - -[discrete#es-connectors-ms-sql-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/generic_database.py[connector source code^]. -Note that this data source uses the `generic_database.py` connector source code. - -Refer to {connectors-python}/connectors/sources/mssql.py[`mssql.py`^] for additional code, specific to this data source. -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, users will be able to update these values in Kibana. -==== - -The following configuration fields are required to set up the connector: - -`host`:: -The server host address where the Microsoft SQL Server is hosted. -Default value is `127.0.0.1`. -Examples: -+ -* `192.158.1.38` -* `demo.instance.demo-region.demo.service.com` - -`port`:: -The port where the Microsoft SQL Server is hosted. Default value is `9090`. - -`username`:: -The username of the account for Microsoft SQL Server. (SQL Server Authentication only) - -`password`:: -The password of the account to be used for the Microsoft SQL Server. (SQL Server Authentication only) - -`database`:: -Name of the Microsoft SQL Server database. -Examples: -+ -* `employee_database` -* `customer_database` - -`tables`:: -Comma-separated list of tables. -The Microsoft SQL connector will fetch data from all tables present in the configured database, if the value is `*` . -Default value is `*`. -Examples: -+ -* `table_1, table_2` -* `*` -+ -[WARNING] -==== -This field can be bypassed by advanced sync rules. -==== - -`fetch_size`:: -Rows fetched per request. - -`retry_count`:: -The number of retry attempts per failed request. - -`schema`:: -Name of the Microsoft SQL Server schema. -Default value is `dbo`. -+ -Examples: -+ -* `dbo` -* `custom_schema` - -`ssl_enabled`:: -SSL verification enablement. -Default value is `False`. - -`ssl_ca`:: -Content of SSL certificate. -If SSL is disabled, the `ssl_ca` value will be ignored. -+ -.*Expand* to see an example certificate -[%collapsible] -==== -``` ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- -``` -==== - -`validate_host`:: -Host validation enablement. -Default value is `False`. - -[discrete#es-connectors-ms-sql-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-ms-sql-client-documents-syncs] -===== Documents and syncs - -* Tables with no primary key defined are skipped. -* If the `last_user_update` of `sys.dm_db_index_usage_stats` table is not available for a specific table and database then all data in that table will be synced. - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. -**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-ms-sql-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. -For more information read <>. - -[discrete#es-connectors-ms-sql-client-sync-rules-advanced] -====== Advanced sync rules - -This connector supports <> for remote filtering. -These rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -Here are a few examples of advanced sync rules for this connector. - -.*Expand* to see example data -[%collapsible] -==== - -*`employee` table* - -[cols="3*", options="header"] -|=== -| emp_id | name | age -| 3 | John | 28 -| 10 | Jane | 35 -| 14 | Alex | 22 -|=== - -* -*`customer` table* - -[cols="3*", options="header"] -|=== -| c_id | name | age -| 2 | Elm | 24 -| 6 | Pine | 30 -| 9 | Oak | 34 -|=== -==== - -[discrete#es-connectors-ms-sql-client-sync-rules-advanced-queries] -*Example: Two queries* - -These rules fetch all records from both the `employee` and `customer` tables. The data from these tables will be synced separately to Elasticsearch. - -[source,js] ----- -[ - { - "tables": [ - "employee" - ], - "query": "SELECT * FROM employee" - }, - { - "tables": [ - "customer" - ], - "query": "SELECT * FROM customer" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-ms-sql-client-sync-rules-example-one-where] -*Example: One WHERE query* - -This rule fetches only the records from the `employee` table where the `emp_id` is greater than 5. Only these filtered records will be synced to Elasticsearch. - -[source,js] ----- -[ - { - "tables": ["employee"], - "query": "SELECT * FROM employee WHERE emp_id > 5" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-ms-sql-client-sync-rules-example-one-join] -*Example: One JOIN query* - -This rule fetches records by performing an INNER JOIN between the `employee` and `customer` tables on the condition that the `emp_id` in `employee` matches the `c_id` in `customer`. The result of this combined data will be synced to Elasticsearch. - -[source,js] ----- -[ - { - "tables": ["employee", "customer"], - "query": "SELECT * FROM employee INNER JOIN customer ON employee.emp_id = customer.c_id" - } -] ----- -// NOTCONSOLE - -[WARNING] -==== -When using advanced rules, a query can bypass the configuration field `tables`. -This will happen if the query specifies a table that doesn't appear in the configuration. -This can also happen if the configuration specifies `*` to fetch all tables while the advanced sync rule requests for only a subset of tables. -==== - -[discrete#es-connectors-ms-sql-client-client-operations-testing] -===== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the Microsoft SQL connector, run the following command: - -[source,shell] ----- -make ftest NAME=mssql ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=mssql DATA_SIZE=small ----- - -[discrete#es-connectors-ms-sql-client-known-issues] -===== Known issues - -There are no known issues for this connector. -See <> for any issues affecting all connectors. - -[discrete#es-connectors-ms-sql-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-ms-sql-client-security] -===== Security - -See <>. - - - - -This connector uses the https://github.com/elastic/connectors-python/blob/{branch}/connectors/sources/generic_database.py[generic database connector source code^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -View {connectors-python}/connectors/sources/mssql.py[additional code specific to this data source^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-mysql.asciidoc b/docs/reference/connector/docs/connectors-mysql.asciidoc deleted file mode 100644 index 59a9724f1ffe4..0000000000000 --- a/docs/reference/connector/docs/connectors-mysql.asciidoc +++ /dev/null @@ -1,538 +0,0 @@ -[#es-connectors-mysql] -=== Elastic MySQL connector reference -++++ -MySQL -++++ -// Attributes used in this file: -:service-name: MySQL -:service-name-stub: mysql - -The _Elastic MySQL connector_ is a <> for https://www.mysql.com[MySQL^] data sources. -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-mysql-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-mysql-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* in Elastic versions *8.5.0 and later*. -To use this connector natively in Elastic Cloud, satisfy all <>. - -This connector has no additional prerequisites beyond the shared requirements, linked above. - -[discrete#es-connectors-mysql-compatibility] -===== Compatibility - -This connector is compatible with *MySQL 5.6 and later*. - -The connector is also compatible with *MariaDB* databases compatible with the above. - -The data source and your Elastic deployment must be able to communicate with each other over a network. - -[discrete#es-connectors-mysql-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-mysql-usage] -===== Usage - -To use this connector natively in Elastic Cloud, see <>. - -For additional operations, see <>. - -[discrete#es-connectors-mysql-configuration] -===== Configuration - -Each time you create an index to be managed by this connector, you will create a new connector configuration. -You will need some or all of the following information about the data source. - -Host:: -The IP address or domain name of the MySQL host, excluding port. -Examples: -+ -* `192.158.1.38` -* `localhost` - -Port:: -The port of the MySQL host. -Examples: -+ -* `3306` -* `3307` - -Username:: -The MySQL username the connector will use. -+ -The user must have access to the configured database. -You may want to create a dedicated, read-only user for each connector. - -Password:: -The MySQL password the connector will use. - -Database:: -The MySQL database to sync. -The database must be accessible using the configured username and password. -+ -Examples: -+ -* `products` -* `orders` - -Comma-separated list of tables:: -The tables in the configured database to sync. -One or more table names, separated by commas. -The tables must be accessible using the configured username and password. -+ -Examples: -+ -* `furniture, food, toys` -* `laptops` -+ -[TIP] -==== -This field can be bypassed when using advanced sync rules. -==== - -Enable SSL:: -Whether SSL verification will be enabled. -Default value is `True`. - -SSL Certificate:: -Content of SSL certificate. -If SSL is disabled, the SSL certificate value will be ignored. -+ -.*Expand* to see an example certificate -[%collapsible] -==== -``` ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -AlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHROb2RlMDExFjAUBgNV -BAsTDURlZmF1bHRDZWxsMDExGTAXBgNVBAsTEFJvb3QgQ2VydGlmaWNhdGUxEjAQ -BgNVBAMTCWxvY2FsaG9zdDAeFw0yMTEyMTQyMjA3MTZaFw0yMjEyMTQyMjA3MTZa -MF8xCzAJBgNVBAYTAlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHRO -b2RlMDExFjAUBgNVBAsTDURlZmF1bHRDZWxsMDExEjAQBgNVBAMTCWxvY2FsaG9z -dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMv5HCsJZIpI5zCy+jXV -z6lmzNc9UcVSEEHn86h6zT6pxuY90TYeAhlZ9hZ+SCKn4OQ4GoDRZhLPTkYDt+wW -CV3NTIy9uCGUSJ6xjCKoxClJmgSQdg5m4HzwfY4ofoEZ5iZQ0Zmt62jGRWc0zuxj -hegnM+eO2reBJYu6Ypa9RPJdYJsmn1RNnC74IDY8Y95qn+WZj//UALCpYfX41hko -i7TWD9GKQO8SBmAxhjCDifOxVBokoxYrNdzESl0LXvnzEadeZTd9BfUtTaBHhx6t -njqqCPrbTY+3jAbZFd4RiERPnhLVKMytw5ot506BhPrUtpr2lusbN5svNXjuLeea -MMUCAwEAAaOBoDCBnTATBgNVHSMEDDAKgAhOatpLwvJFqjAdBgNVHSUEFjAUBggr -BgEFBQcDAQYIKwYBBQUHAwIwVAYDVR0RBE0wS4E+UHJvZmlsZVVVSUQ6QXBwU3J2 -MDEtQkFTRS05MDkzMzJjMC1iNmFiLTQ2OTMtYWI5NC01Mjc1ZDI1MmFmNDiCCWxv -Y2FsaG9zdDARBgNVHQ4ECgQITzqhA5sO8O4wDQYJKoZIhvcNAQELBQADggEBAKR0 -gY/BM69S6BDyWp5dxcpmZ9FS783FBbdUXjVtTkQno+oYURDrhCdsfTLYtqUlP4J4 -CHoskP+MwJjRIoKhPVQMv14Q4VC2J9coYXnePhFjE+6MaZbTjq9WaekGrpKkMaQA -iQt5b67jo7y63CZKIo9yBvs7sxODQzDn3wZwyux2vPegXSaTHR/rop/s/mPk3YTS -hQprs/IVtPoWU4/TsDN3gIlrAYGbcs29CAt5q9MfzkMmKsuDkTZD0ry42VjxjAmk -xw23l/k8RoD1wRWaDVbgpjwSzt+kl+vJE/ip2w3h69eEZ9wbo6scRO5lCO2JM4Pr -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- -``` -==== - -[discrete#es-connectors-mysql-known-issues] -===== Known issues - -This connector has the following known issues: - -* *Upgrading from a tech preview connector (8.7 or earlier) to 8.8 will cause the MySQL connector configuration to be invalid.* -+ -MySQL connectors prior to 8.8 can be missing some configuration fields that are required for the connector to run. -If you would like to continue using your MySQL connector after upgrading from 8.7 or earlier, run the script below to fix your connector's configuration. -This will populate the configuration with the missing fields. -The auxilliary information needed for the configuration will then be automatically added by by the self-managed connector. -+ -[source,console] ----- -POST /.elastic-connectors/_update/connector_id -{ - "doc" : { - "configuration": { - "tables": { - "type": "list", - "value": "*" - }, - "ssl_enabled": { - "type": "bool", - "value": false - }, - "ssl_ca": { - "type": "str", - "value": "" - }, - "fetch_size": { - "type": "int", - "value": 50 - }, - "retry_count": { - "type": "int", - "value": 3 - } - } - } -} ----- -// TEST[skip:TODO] -+ -* *Upgrading to 8.8 does not migrate MySQL sync rules.* -+ -After upgrading, you must re-create your sync rules. - -See <> for any issues affecting all connectors. - -[discrete#es-connectors-mysql-syncs] -===== Documents and syncs - -The following describes the default syncing behavior for this connector. -Use <> and {ref}/ingest-pipeline-search.html[ingest pipelines] to customize syncing for specific indices. - -All records in the MySQL database included in your connector configuration are extracted and transformed into documents in your Elasticsearch index. - -* For each row in your MySQL database table, the connector creates one *Elasticsearch document*. -* For each column, the connector transforms the column into an *Elasticsearch field*. -* Elasticsearch {ref}/dynamic-mapping.html[dynamically maps^] MySQL data types to *Elasticsearch data types*. -* Tables with no primary key defined are skipped. -* Field values that represent other records are replaced with the primary key for that record (composite primary keys are joined with `_`). - -The Elasticsearch mapping is created when the first document is created. - -Each sync is a "full" sync. - -For each MySQL row discovered: - -* If it does not exist, the document is created in Elasticsearch. -* If it already exists in Elasticsearch, the Elasticsearch document is replaced and the version is incremented. -* If an existing Elasticsearch document no longer exists in the MySQL table, it is deleted from Elasticsearch. - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted -* Permissions are not synced by default. -*All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-mysql-sync-rules] -===== Sync rules - -The following sections describe <> for this connector. - -<> are identical for all connectors and are available by default. - -<> for MySQL can be used to pass arbitrary SQL statements to a MySQL instance. - -[IMPORTANT] -==== -You need to specify the tables used in your custom query in the "tables" field. -==== - -For example: - -[source,js] ----- -[ - { - "tables": ["table1", "table2"], - "query": "SELECT ... FROM ..." - } -] ----- -// NOTCONSOLE - -[WARNING] -==== -When using advanced rules, a query can bypass the configuration field `tables`. -This will happen if the query specifies a table that doesn't appear in the configuration. -This can also happen if the configuration specifies `*` to fetch all tables while the advanced sync rule requests for only a subset of tables. -==== - -[discrete#es-connectors-mysql-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-mysql-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-mysql-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-mysql-client-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* in Elastic versions *8.5.0 and later*. -To use this connector natively in Elastic Cloud, satisfy all <>. - -This connector is also available as a *self-managed connector* from the *Elastic connector framework*. -To use this connector as a self-managed connector, satisfy all <>. - -This connector has no additional prerequisites beyond the shared requirements, linked above. - -[discrete#es-connectors-mysql-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-mysql-client-usage] -===== Usage - -To use this connector as a *managed connector*, use the *Connector* workflow. -See <>. - -To use this connector as a *self-managed connector*, see <>. - -For additional operations, see <>. - -[discrete#es-connectors-mysql-client-compatibility] -===== Compatibility - -This connector is compatible with *MySQL 5.6 and later*. - -The connector is also compatible with *MariaDB* databases compatible with the above. - -The data source and your Elastic deployment must be able to communicate with each other over a network. - -[discrete#es-connectors-mysql-client-configuration] -===== Configuration - -Each time you create an index to be managed by this connector, you will create a new connector configuration. -You will need some or all of the following information about the data source. - -Host:: -The IP address or domain name of the MySQL host, excluding port. -Examples: -+ -* `192.158.1.38` -* `localhost` - -Port:: -The port of the MySQL host. -Examples: -+ -* `3306` -* `3307` - -Username:: -The MySQL username the connector will use. -+ -The user must have access to the configured database. -You may want to create a dedicated, read-only user for each connector. - -Password:: -The MySQL password the connector will use. - -Database:: -The MySQL database to sync. -The database must be accessible using the configured username and password. -+ -Examples: -+ -* `products` -* `orders` - -Tables:: -The tables in the configured database to sync. -One or more table names, separated by commas. -The tables must be accessible using the configured username and password. -+ -Examples: -+ -* `furniture, food, toys` -* `laptops` - -Enable SSL:: -Whether SSL verification will be enabled. -Default value is `True`. - -SSL Certificate:: -Content of SSL certificate. -If SSL is disabled, the SSL certificate value will be ignored. -+ -.*Expand* to see an example certificate -[%collapsible] -==== -``` ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -AlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHROb2RlMDExFjAUBgNV -BAsTDURlZmF1bHRDZWxsMDExGTAXBgNVBAsTEFJvb3QgQ2VydGlmaWNhdGUxEjAQ -BgNVBAMTCWxvY2FsaG9zdDAeFw0yMTEyMTQyMjA3MTZaFw0yMjEyMTQyMjA3MTZa -MF8xCzAJBgNVBAYTAlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHRO -b2RlMDExFjAUBgNVBAsTDURlZmF1bHRDZWxsMDExEjAQBgNVBAMTCWxvY2FsaG9z -dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMv5HCsJZIpI5zCy+jXV -z6lmzNc9UcVSEEHn86h6zT6pxuY90TYeAhlZ9hZ+SCKn4OQ4GoDRZhLPTkYDt+wW -CV3NTIy9uCGUSJ6xjCKoxClJmgSQdg5m4HzwfY4ofoEZ5iZQ0Zmt62jGRWc0zuxj -hegnM+eO2reBJYu6Ypa9RPJdYJsmn1RNnC74IDY8Y95qn+WZj//UALCpYfX41hko -i7TWD9GKQO8SBmAxhjCDifOxVBokoxYrNdzESl0LXvnzEadeZTd9BfUtTaBHhx6t -njqqCPrbTY+3jAbZFd4RiERPnhLVKMytw5ot506BhPrUtpr2lusbN5svNXjuLeea -MMUCAwEAAaOBoDCBnTATBgNVHSMEDDAKgAhOatpLwvJFqjAdBgNVHSUEFjAUBggr -BgEFBQcDAQYIKwYBBQUHAwIwVAYDVR0RBE0wS4E+UHJvZmlsZVVVSUQ6QXBwU3J2 -MDEtQkFTRS05MDkzMzJjMC1iNmFiLTQ2OTMtYWI5NC01Mjc1ZDI1MmFmNDiCCWxv -Y2FsaG9zdDARBgNVHQ4ECgQITzqhA5sO8O4wDQYJKoZIhvcNAQELBQADggEBAKR0 -gY/BM69S6BDyWp5dxcpmZ9FS783FBbdUXjVtTkQno+oYURDrhCdsfTLYtqUlP4J4 -CHoskP+MwJjRIoKhPVQMv14Q4VC2J9coYXnePhFjE+6MaZbTjq9WaekGrpKkMaQA -iQt5b67jo7y63CZKIo9yBvs7sxODQzDn3wZwyux2vPegXSaTHR/rop/s/mPk3YTS -hQprs/IVtPoWU4/TsDN3gIlrAYGbcs29CAt5q9MfzkMmKsuDkTZD0ry42VjxjAmk -xw23l/k8RoD1wRWaDVbgpjwSzt+kl+vJE/ip2w3h69eEZ9wbo6scRO5lCO2JM4Pr -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- -``` -==== - -[discrete#es-connectors-mysql-client-known-issues] -===== Known issues - -This connector has the following known issues: - -* *Upgrading from a tech preview connector (8.7 or earlier) to 8.8 will cause the MySQL connector configuration to be invalid.* -+ -MySQL connectors prior to 8.8 can be missing some configuration fields that are required for the connector to run. -If you would like to continue using your MySQL connector after upgrading from 8.7 or earlier, run the script below to fix your connector's configuration. -This will populate the configuration with the missing fields. -The auxilliary information needed for the configuration will then be automatically added by by the self-managed connector. -+ -[source,console] ----- -POST /.elastic-connectors/_update/connector_id -{ - "doc" : { - "configuration": { - "tables": { - "type": "list", - "value": "*" - }, - "ssl_enabled": { - "type": "bool", - "value": false - }, - "ssl_ca": { - "type": "str", - "value": "" - }, - "fetch_size": { - "type": "int", - "value": 50 - }, - "retry_count": { - "type": "int", - "value": 3 - } - } - } -} ----- -// TEST[skip:TODO] -+ -* *Upgrading to 8.8 does not migrate MySQL sync rules.* -+ -After upgrading, you must re-create your sync rules. - -See <> for any issues affecting all connectors. - -[discrete#es-connectors-mysql-client-syncs] -===== Documents and syncs - -The following describes the default syncing behavior for this connector. -Use <> and {ref}/ingest-pipeline-search.html[ingest pipelines] to customize syncing for specific indices. - -All records in the MySQL database included in your connector configuration are extracted and transformed into documents in your Elasticsearch index. - -* For each row in your MySQL database table, the connector creates one *Elasticsearch document*. -* For each column, the connector transforms the column into an *Elasticsearch field*. -* Elasticsearch {ref}/dynamic-mapping.html[dynamically maps^] MySQL data types to *Elasticsearch data types*. -* Tables with no primary key defined are skipped. -* Field values that represent other records are replaced with the primary key for that record (composite primary keys are joined with `_`). - -The Elasticsearch mapping is created when the first document is created. - -Each sync is a "full" sync. - -For each MySQL row discovered: - -* If it does not exist, the document is created in Elasticsearch. -* If it already exists in Elasticsearch, the Elasticsearch document is replaced and the version is incremented. -* If an existing Elasticsearch document no longer exists in the MySQL table, it is deleted from Elasticsearch. - -[discrete#es-connectors-mysql-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-mysql-client-sync-rules] -===== Sync rules - -The following sections describe <> for this connector. - -<> are identical for all connectors and are available by default. - -<> for MySQL can be used to pass arbitrary SQL statements to a MySQL instance. - -[IMPORTANT] -==== -You need to specify the tables used in your custom query in the "tables" field. -==== - -For example: - -[source,js] ----- -[ - { - "tables": ["table1", "table2"], - "query": "SELECT ... FROM ..." - } -] ----- -// NOTCONSOLE - -[WARNING] -==== -When using advanced rules, a query can bypass the configuration field `tables`. -This will happen if the query specifies a table that doesn't appear in the configuration. -This can also happen if the configuration specifies `*` to fetch all tables while the advanced sync rule requests for only a subset of tables. -==== - -[discrete#es-connectors-mysql-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-mysql-client-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-network-drive.asciidoc b/docs/reference/connector/docs/connectors-network-drive.asciidoc deleted file mode 100644 index 909e3440c9f02..0000000000000 --- a/docs/reference/connector/docs/connectors-network-drive.asciidoc +++ /dev/null @@ -1,533 +0,0 @@ -[#es-connectors-network-drive] -=== Elastic network drive connector reference -++++ -Network drive -++++ - -// Attributes used in this file: -:service-name: Network drive -:service-name-stub: network_drive - -The _Elastic network drive connector_ is a <> for network drive data sources. -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-network-drive-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-network-drive-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* on Elastic Cloud, as of *8.9.1*. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[discrete#es-connectors-network-drive-usage] -===== Usage - -To use this connector natively in Elastic Cloud, see <>. - -[discrete#es-connectors-network-drive-configuration] -===== Configuration - -The following configuration fields are required to set up the connector: - -Username:: -The username of the account for the network drive. -The user must have at least **read** permissions for the folder path provided. - -Password:: -The password of the account to be used for crawling the network drive. - -IP address:: -The server IP address where the network drive is hosted. -Default value is `127.0.0.1`. - -Port:: -The server port where the network drive service is available. -Default value is `445`. - -Path:: -** The network drive path the connector will crawl to fetch files. -This is the name of the folder shared via SMB. -The connector uses the Python https://github.com/jborean93/smbprotocol[`smbprotocol`^] library which supports both *SMB v2* and *v3*. -** Accepts only one path— parent folders can be specified to widen the scope. -** The drive path should use *forward slashes* as path separators. -Example: -+ -* `admin/bin` - -Enable document level security:: -Toggle to enable document level security (DLS). When enabled: -* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -* Access control syncs will fetch users' access control lists and store them in a separate index. -Default value is `false`. -+ -[TIP] -==== -Refer to <> for more information, including prerequisites and limitations. -==== - -Identity mappings:: -Path to a CSV file containing user and group SIDs (For Linux Network Drive). -+ -File should be formatted as follows: -+ -* Fields separated by semicolons (`;`) -* Three fields per line: `Username;User-SID;Group-SIDs` -* Group-SIDs are comma-separated and optional. -+ -*Example* with one username, user-sid and no group: -+ -[source,text] ----- -user1;S-1; ----- -+ -*Example* with one username, user-sid and two groups: -+ -[source,text] ----- -user1;S-1;S-11,S-22 ----- - -[discrete#es-connectors-network-drive-documents-syncs] -===== Documents and syncs - -The connector syncs folders as separate documents in Elasticsearch. -The following fields will be added for the document type `folder`: - -* `create_time` -* `title` -* `path` -* `modified` -* `time` -* `id` - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced by default. -You must first enable <>. -Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-network-drive-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-network-drive-dls] -===== Document level security - -Document Level Security (DLS) enables you to restrict access to documents based on a user's permissions. -DLS facilitates the syncing of folder and file permissions, including both user and group level permissions. - -[NOTE] -==== -**Note:** Refer to <> to learn how to search data with DLS enabled, when building a search application. -==== - -[discrete#es-connectors-network-drive-dls-availability] -====== Availability - -* The present version of Network Drive connector offers DLS support for Windows network drives only. -* To fetch users and groups in a Windows network drive, account credentials added in the connector configuration should have access to the Powershell of the Windows Server where the network drive is hosted. - -[discrete#es-connectors-network-drive-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-network-drive-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -Advanced sync rules are defined through a source-specific DSL JSON snippet. -Advanced sync rules for this connector use *glob patterns*. - -1. Each rule must contains a glob pattern. This pattern is then matched against all the available folder paths inside the configured drive path. -2. The pattern must begin with the `drive_path` field configured in the connector. -3. If the pattern matches any available folder paths, the contents directly within those folders will be fetched. - -The following sections provide examples of advanced sync rules for this connector. - -[discrete#es-connectors-network-drive-indexing-files-and-folders-recursively-within-folders] -*Indexing files and folders recursively within folders* - -[source,js] ----- -[ - { - "pattern": "Folder-shared/a/mock/**" - }, - { - "pattern": "Folder-shared/b/alpha/**" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-network-drive-indexing-files-and-folders-directly-inside-folder] -*Indexing files and folders directly inside folder* - -[source,js] ----- -[ - { - "pattern": "Folder-shared/a/b/test" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-network-drive-indexing-files-and-folders-directly-inside-a-set-of-folders] -*Indexing files and folders directly inside a set of folders* - -[source,js] ----- -[ - { - "pattern": "Folder-shared/org/*/all-tests/test[135]" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-network-drive-excluding-files-and-folders-that-match-a-pattern] -*Excluding files and folders that match a pattern* - -[source,js] ----- -[ - { - "pattern": "Folder-shared/**/all-tests/test[!7]" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-network-drive-content-extraction] -===== Content extraction - -See <>. - -[discrete#es-connectors-network-drive-known-issues] -===== Known issues - -There are no known issues for this connector. - -See <> for any issues affecting all connectors. - -[discrete#es-connectors-network-drive-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-network-drive-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-network-drive-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-network-drive-client-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. -This self-managed connector is compatible with Elastic versions *8.6.0+*. - -To use this connector, satisfy all <>. - -[discrete#es-connectors-network-drive-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, see <>For additional usage operations, see <>. - -[discrete#es-connectors-network-drive-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/network_drive.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required to set up the connector: - -`username`:: -The username of the account for the network drive. -The user must have at least **read** permissions for the folder path provided. - -`password`:: -The password of the account to be used for crawling the network drive. - -`server_ip`:: -The server IP address where the network drive is hosted. -Default value is `127.0.0.1`. - -`server_port`:: -The server port where the network drive service is available. -Default value is `445`. - -`drive_path`:: -** The network drive path the connector will crawl to fetch files. -This is the name of the folder shared via SMB. -The connector uses the Python https://github.com/jborean93/smbprotocol[`smbprotocol`^] library which supports both *SMB v2* and *v3*. -** Accepts only one path— parent folders can be specified to widen the scope. -** The drive path should use *forward slashes* as path separators. -Example: -+ -* `admin/bin` - -`use_document_level_security`:: -Toggle to enable document level security (DLS). When enabled: -* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -* Access control syncs will fetch users' access control lists and store them in a separate index. -+ -[TIP] -==== -Refer to <> for more information, including prerequisites and limitations. -==== - -`drive_type`:: -The type of network drive to be crawled. -The following options are available: -+ -* `Windows` -* `Linux` - -`identity_mappings`:: -Path to a CSV file containing user and group SIDs (For Linux Network Drive). -+ -File should be formatted as follows: -+ -* Fields separated by semicolons (`;`) -* Three fields per line: `Username;User-SID;Group-SIDs` -* Group-SIDs are comma-separated and optional. -+ -*Example* with one username, user-sid and no group: -+ -[source,text] ----- -user1;S-1; ----- -+ -*Example* with one username, user-sid and two groups: -+ -[source,text] ----- -user1;S-1;S-11,S-22 ----- - -[discrete#es-connectors-network-drive-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-network-drive-client-documents-syncs] -===== Documents and syncs - -The connector syncs folders as separate documents in Elasticsearch. -The following fields will be added for the document type `folder`: - -* `create_time` -* `title` -* `path` -* `modified` -* `time` -* `id` - - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted -* Permissions are not synced by default. -You must first enable <>. -Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-network-drive-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-network-drive-client-dls] -===== Document level security - -Document Level Security (DLS) enables you to restrict access to documents based on a user's permissions. -DLS facilitates the syncing of folder and file permissions, including both user and group level permissions. - -[NOTE] -==== -**Note:** Refer to <> to learn how to search data with DLS enabled, when building a search application. -==== - -[discrete#es-connectors-network-drive-client-dls-availability] -====== Availability - -* The Network Drive self-managed connector offers DLS support for both Windows and Linux network drives. -* To fetch users and groups in a Windows network drive, account credentials added in the connector configuration should have access to the Powershell of the Windows Server where the network drive is hosted. - -[discrete#es-connectors-network-drive-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-network-drive-client-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -Advanced sync rules are defined through a source-specific DSL JSON snippet. -Advanced sync rules for this connector use *glob patterns*. - -1. Each rule must contains a glob pattern. This pattern is then matched against all the available folder paths inside the configured drive path. -2. The pattern must begin with the `drive_path` field configured in the connector. -3. If the pattern matches any available folder paths, the contents directly within those folders will be fetched. - -The following sections provide examples of advanced sync rules for this connector. - -[discrete#es-connectors-network-drive-client-indexing-files-and-folders-recursively-within-folders] -*Indexing files and folders recursively within folders* - -[source,js] ----- -[ - { - "pattern": "Folder-shared/a/mock/**" - }, - { - "pattern": "Folder-shared/b/alpha/**" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-network-drive-client-indexing-files-and-folders-directly-inside-folder] -*Indexing files and folders directly inside folder* - -[source,js] ----- -[ - { - "pattern": "Folder-shared/a/b/test" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-network-drive-client-indexing-files-and-folders-directly-inside-a-set-of-folders] -*Indexing files and folders directly inside a set of folders* - -[source,js] ----- -[ - { - "pattern": "Folder-shared/org/*/all-tests/test[135]" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-network-drive-client-excluding-files-and-folders-that-match-a-pattern] -*Excluding files and folders that match a pattern* - -[source,js] ----- -[ - { - "pattern": "Folder-shared/**/all-tests/test[!7]" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-network-drive-client-content-extraction] -===== Content extraction - -See <>. - -[discrete#es-connectors-network-drive-client-tests] -===== End-to-end tests - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To execute a functional test for the Network Drive self-managed connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=network_drive ----- - -By default, this will use a medium-sized dataset. -For faster tests add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=network_drive DATA_SIZE=small ----- - -[discrete#es-connectors-network-drive-client-known-issues] -===== Known issues - -There are no known issues for this connector. - -See <> for any issues affecting all connectors. - -[discrete#es-connectors-network-drive-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-network-drive-client-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-notion.asciidoc b/docs/reference/connector/docs/connectors-notion.asciidoc deleted file mode 100644 index 7c08c5d81e032..0000000000000 --- a/docs/reference/connector/docs/connectors-notion.asciidoc +++ /dev/null @@ -1,747 +0,0 @@ -[#es-connectors-notion] -=== Elastic Notion Connector reference -++++ -Notion -++++ - -// Attributes (AKA variables) used in this file -:service-name: Notion -:service-name-stub: notion - -The Notion connector is written in Python using the {connectors-python}[Elastic connector framework^]. -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-notion-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-notion-connector-availability-and-prerequisites] -===== Availability and prerequisites - -This managed connector was introduced in Elastic *8.14.0* as a managed service on Elastic Cloud. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[NOTE] -==== -This connector is in *beta* and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Beta features are not subject to the support SLA of official GA features. -==== - -[discrete#es-connectors-notion-connector-usage] -===== Usage - -To use this connector in the UI, select the *Notion* tile when creating a new connector under *Search -> Connectors*. - -If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. - -For additional operations, see <>. - -[discrete#es-connectors-notion-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-notion-connector-connecting-to-notion] -===== Connecting to Notion - -To connect to Notion, the user needs to https://www.notion.so/help/create-integrations-with-the-notion-api#create-an-internal-integration[create an internal integration] for their Notion workspace, which can access resources using the Internal Integration Secret Token. Configure the Integration with following settings: - -1. Users must grant `READ` permission for content, comment and user capabilities for that integration from the Capabilities tab. - -2. Users must manually https://www.notion.so/help/add-and-manage-connections-with-the-api#add-connections-to-pages[add the integration as a connection] to the top-level pages in a workspace. Sub-pages will inherit the connections of the parent page automatically. - -[discrete#es-connectors-notion-connector-configuration] -===== Configuration - -Note the following configuration fields: - -`Notion Secret Key`(required):: -Secret token assigned to your integration, for a particular workspace. Example: - -* `zyx-123453-12a2-100a-1123-93fd09d67394` - -`Databases`(required):: -Comma-separated list of database names to be fetched by the connector. If the value is `*`, connector will fetch all the databases available in the workspace. Example: - -* `database1, database2` -* `*` - -`Pages`(required):: -Comma-separated list of page names to be fetched by the connector. If the value is `*`, connector will fetch all the pages available in the workspace. Examples: - -* `*` -* `Page1, Page2` - -`Index Comments`:: - -Toggle to enable fetching and indexing of comments from the Notion workspace for the configured pages, databases and the corresponding child blocks. Default value is `False`. - -[NOTE] -==== -Enabling comment indexing could impact connector performance due to increased network calls. Therefore, by default this value is `False`. -==== - -[discrete#es-connectors-notion-connector-content-extraction] -====== Content Extraction - -Refer to <>. - -[discrete#es-connectors-notion-connector-documents-and-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Pages* -** Includes metadata such as `page name`, `id`, `last updated time`, etc. -* *Blocks* -** Includes metadata such as `title`, `type`, `id`, `content` (in case of file block), etc. -* *Databases* -** Includes metadata such as `name`, `id`, `records`, `size`, etc. -* *Users* -** Includes metadata such as `name`, `id`, `email address`, etc. -* *Comments* -** Includes the content and metadata such as `id`, `last updated time`, `created by`, etc. -** *Note*: Comments are excluded by default. - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to the relevant Elasticsearch index. -==== - -[discrete#es-connectors-notion-connector-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-notion-connector-advanced-sync-rules] -===== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -The following section describes *advanced sync rules* for this connector, to filter data in Notion _before_ indexing into {es}. -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -Advanced sync rules for Notion take the following parameters: - -1. `searches`: Notion's search filter to search by title. -2. `query`: Notion's database query filter to fetch a specific database. - -[discrete#es-connectors-notion-connector-advanced-sync-rules-examples] -====== Examples - -[discrete] -*Example 1* - -Indexing every page where the title contains `Demo Page`: - -[source,js] ----- - { - "searches": [ - { - "filter": { - "value": "page" - }, - "query": "Demo Page" - } - ] - } ----- -// NOTCONSOLE - -[discrete] -*Example 2* - -Indexing every database where the title contains `Demo Database`: - -[source,js] ----- -{ - "searches": [ - { - "filter": { - "value": "database" - }, - "query": "Demo Database" - } - ] -} ----- -// NOTCONSOLE - -[discrete] -*Example 3* - -Indexing every database where the title contains `Demo Database` and every page where the title contains `Demo Page`: - -[source,js] ----- -{ - "searches": [ - { - "filter": { - "value": "database" - }, - "query": "Demo Database" - }, - { - "filter": { - "value": "page" - }, - "query": "Demo Page" - } - ] -} ----- -// NOTCONSOLE - -[discrete] -*Example 4* - -Indexing all pages in the workspace: - -[source,js] ----- -{ - "searches": [ - { - "filter": { - "value": "page" - }, - "query": "" - } - ] -} ----- -// NOTCONSOLE - -[discrete] -*Example 5* - -Indexing all the pages and databases connected to the workspace: - -[source,js] ----- -{ - "searches":[ - { - "query":"" - } - ] -} ----- -// NOTCONSOLE - -[discrete] -*Example 6* - -Indexing all the rows of a database where the record is `true` for the column `Task completed` and its property(datatype) is a checkbox: - -[source,js] ----- -{ - "database_query_filters": [ - { - "filter": { - "property": "Task completed", - "checkbox": { - "equals": true - } - }, - "database_id": "database_id" - } - ] -} ----- -// NOTCONSOLE - -[discrete] -*Example 7* - -Indexing all rows of a specific database: - -[source,js] ----- -{ - "database_query_filters": [ - { - "database_id": "database_id" - } - ] -} ----- -// NOTCONSOLE - -[discrete] -*Example 8* - -Indexing all blocks defined in `searches` and `database_query_filters`: - -[source,js] ----- -{ - "searches":[ - { - "query":"External tasks", - "filter":{ - "value":"database" - } - }, - { - "query":"External tasks", - "filter":{ - "value":"page" - } - } - ], - "database_query_filters":[ - { - "database_id":"notion_database_id1", - "filter":{ - "property":"Task completed", - "checkbox":{ - "equals":true - } - } - } - ] -} ----- -// NOTCONSOLE - -[NOTE] -==== -In this example the `filter` object syntax for `database_query_filters` is defined per the https://developers.notion.com/reference/post-database-query-filter[Notion documentation]. -==== - -[discrete#es-connectors-notion-connector-known-issues] -===== Known issues - -* *Updates to new pages may not be reflected immediately in the Notion API.* -+ -This could lead to these pages not being indexed by the connector, if a sync is initiated immediately after their addition. -To ensure all pages are indexed, initiate syncs a few minutes after adding pages to Notion. - -* *Notion's Public API does not support linked databases.* -+ -Linked databases in Notion are copies of a database that can be filtered, sorted, and viewed differently. -To fetch the information in a linked database, you need to target the original *source* database. -For more details refer to the https://developers.notion.com/docs/working-with-databases#linked-databases[Notion documentation]. - -* *Documents' `properties` objects are serialized as strings under `details`*. -+ -Notion's schema for `properties` is not consistent, and can lead to `document_parsing_exceptions` if indexed to Elasticsearch as an object. -For this reason, the `properties` object is instead serialized as a JSON string, and stored under the `details` field. -If you need to search a sub-object from `properties`, you may need to post-process the `details` field in an ingest pipeline to extract your desired subfield(s). - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-notion-connector-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-notion-connector-security] -===== Security - -See <>. - - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-notion-connector-client-reference] -==== *Self-managed connector reference* - -.View *self-managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-notion-client-connector-availability-and-prerequisites] -===== Availability and prerequisites - -This connector was introduced in Elastic *8.13.0*, available as a *self-managed* self-managed connector. - -To use this connector, satisfy all <>. -Importantly, you must deploy the connectors service on your own infrastructure. -You have two deployment options: - -* <>. Use this option if you're comfortable working with Python and want to iterate quickly locally. -* <>. Use this option if you want to deploy the connectors to a server, or use a container orchestration platform. - -[NOTE] -==== -This connector is in *beta* and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Beta features are not subject to the support SLA of official GA features. -==== - -[discrete#es-connectors-notion-client-connector-usage] -===== Usage - -To use this connector in the UI, select the *Notion* tile when creating a new connector under *Search -> Connectors*. - -For additional operations, see <>. - -[discrete#es-connectors-notion-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-notion-client-connector-connecting-to-notion] -===== Connecting to Notion - -To connect to Notion, the user needs to https://www.notion.so/help/create-integrations-with-the-notion-api#create-an-internal-integration[create an internal integration] for their Notion workspace, which can access resources using the Internal Integration Secret Token. Configure the Integration with following settings: - -1. Users must grant `READ` permission for content, comment and user capabilities for that integration from the Capabilities tab. - -2. Users must manually https://www.notion.so/help/add-and-manage-connections-with-the-api#add-connections-to-pages[add the integration as a connection] to the top-level pages in a workspace. Sub-pages will inherit the connections of the parent page automatically. - -[discrete#es-connectors-notion-client-connector-docker] -===== Deploy with Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-notion-client-connector-configuration] -===== Configuration - -Note the following configuration fields: - -`Notion Secret Key`(required):: -Secret token assigned to your integration, for a particular workspace. Example: - -* `zyx-123453-12a2-100a-1123-93fd09d67394` - -`Databases`(required):: -Comma-separated list of database names to be fetched by the connector. If the value is `*`, connector will fetch all the databases available in the workspace. Example: - -* `database1, database2` -* `*` - -`Pages`(required):: -Comma-separated list of page names to be fetched by the connector. If the value is `*`, connector will fetch all the pages available in the workspace. Examples: - -* `*` -* `Page1, Page2` - -`Index Comments`:: - -Toggle to enable fetching and indexing of comments from the Notion workspace for the configured pages, databases and the corresponding child blocks. Default value is `False`. - -[NOTE] -==== -Enabling comment indexing could impact connector performance due to increased network calls. Therefore, by default this value is `False`. -==== - -[discrete#es-connectors-notion-client-connector-content-extraction] -====== Content Extraction - -Refer to <>. - -[discrete#es-connectors-notion-client-connector-documents-and-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Pages* -** Includes metadata such as `page name`, `id`, `last updated time`, etc. -* *Blocks* -** Includes metadata such as `title`, `type`, `id`, `content` (in case of file block), etc. -* *Databases* -** Includes metadata such as `name`, `id`, `records`, `size`, etc. -* *Users* -** Includes metadata such as `name`, `id`, `email address`, etc. -* *Comments* -** Includes the content and metadata such as `id`, `last updated time`, `created by`, etc. -** *Note*: Comments are excluded by default. - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to the relevant Elasticsearch index. -==== - -[discrete#es-connectors-notion-client-connector-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-notion-client-connector-advanced-sync-rules] -===== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -The following section describes *advanced sync rules* for this connector, to filter data in Notion _before_ indexing into {es}. -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -Advanced sync rules for Notion take the following parameters: - -1. `searches`: Notion's search filter to search by title. -2. `query`: Notion's database query filter to fetch a specific database. - -[discrete#es-connectors-notion-client-connector-advanced-sync-rules-examples] -====== Examples - -[discrete] -*Example 1* - -Indexing every page where the title contains `Demo Page`: - -[source,js] ----- - { - "searches": [ - { - "filter": { - "value": "page" - }, - "query": "Demo Page" - } - ] - } ----- -// NOTCONSOLE - -[discrete] -*Example 2* - -Indexing every database where the title contains `Demo Database`: - -[source,js] ----- -{ - "searches": [ - { - "filter": { - "value": "database" - }, - "query": "Demo Database" - } - ] -} ----- -// NOTCONSOLE - -[discrete] -*Example 3* - -Indexing every database where the title contains `Demo Database` and every page where the title contains `Demo Page`: - -[source,js] ----- -{ - "searches": [ - { - "filter": { - "value": "database" - }, - "query": "Demo Database" - }, - { - "filter": { - "value": "page" - }, - "query": "Demo Page" - } - ] -} ----- -// NOTCONSOLE - -[discrete] -*Example 4* - -Indexing all pages in the workspace: - -[source,js] ----- -{ - "searches": [ - { - "filter": { - "value": "page" - }, - "query": "" - } - ] -} ----- -// NOTCONSOLE - -[discrete] -*Example 5* - -Indexing all the pages and databases connected to the workspace: - -[source,js] ----- -{ - "searches":[ - { - "query":"" - } - ] -} ----- -// NOTCONSOLE - -[discrete] -*Example 6* - -Indexing all the rows of a database where the record is `true` for the column `Task completed` and its property(datatype) is a checkbox: - -[source,js] ----- -{ - "database_query_filters": [ - { - "filter": { - "property": "Task completed", - "checkbox": { - "equals": true - } - }, - "database_id": "database_id" - } - ] -} ----- -// NOTCONSOLE - -[discrete] -*Example 7* - -Indexing all rows of a specific database: - -[source,js] ----- -{ - "database_query_filters": [ - { - "database_id": "database_id" - } - ] -} ----- -// NOTCONSOLE - -[discrete] -*Example 8* - -Indexing all blocks defined in `searches` and `database_query_filters`: - -[source,js] ----- -{ - "searches":[ - { - "query":"External tasks", - "filter":{ - "value":"database" - } - }, - { - "query":"External tasks", - "filter":{ - "value":"page" - } - } - ], - "database_query_filters":[ - { - "database_id":"notion_database_id1", - "filter":{ - "property":"Task completed", - "checkbox":{ - "equals":true - } - } - } - ] -} ----- -// NOTCONSOLE - -[NOTE] -==== -In this example the `filter` object syntax for `database_query_filters` is defined per the https://developers.notion.com/reference/post-database-query-filter[Notion documentation]. -==== - -[discrete#es-connectors-notion-client-connector-connector-client-operations] -===== Connector Client operations - -[discrete#es-connectors-notion-client-connector-end-to-end-testing] -====== End-to-end Testing - -The connector framework enables operators to run functional tests against a real data source, using Docker Compose. -You don't need a running Elasticsearch instance or Notion source to run this test. - -Refer to <> for more details. - -To perform E2E testing for the Notion connector, run the following command: - - -[source,shell] ----- -$ make ftest NAME=notion ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=notion DATA_SIZE=small ----- - -By default, `DATA_SIZE=MEDIUM`. - -[discrete#es-connectors-notion-client-connector-known-issues] -===== Known issues - -* *Updates to new pages may not be reflected immediately in the Notion API.* -+ -This could lead to these pages not being indexed by the connector, if a sync is initiated immediately after their addition. -To ensure all pages are indexed, initiate syncs a few minutes after adding pages to Notion. - -* *Notion's Public API does not support linked databases.* -+ -Linked databases in Notion are copies of a database that can be filtered, sorted, and viewed differently. -To fetch the information in a linked database, you need to target the original *source* database. -For more details refer to the https://developers.notion.com/docs/working-with-databases#linked-databases[Notion documentation]. - -* *Documents' `properties` objects are serialized as strings under `details`*. -+ -Notion's schema for `properties` is not consistent, and can lead to `document_parsing_exceptions` if indexed to Elasticsearch as an object. -For this reason, the `properties` object is instead serialized as a JSON string, and stored under the `details` field. -If you need to search a sub-object from `properties`, you may need to post-process the `details` field in an ingest pipeline to extract your desired subfield(s). - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-notion-client-connector-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-notion-client-connector-security] -===== Security - -See <>. - - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-onedrive.asciidoc b/docs/reference/connector/docs/connectors-onedrive.asciidoc deleted file mode 100644 index 44ac96e2ad99d..0000000000000 --- a/docs/reference/connector/docs/connectors-onedrive.asciidoc +++ /dev/null @@ -1,604 +0,0 @@ -[#es-connectors-onedrive] -=== Elastic OneDrive connector reference -++++ -OneDrive -++++ -// Attributes used in this file -:service-name: OneDrive -:service-name-stub: onedrive - -The _Elastic OneDrive connector_ is a <> for OneDrive. -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-onedrive-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-onedrive-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* as of Elastic version *8.11.0*. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[discrete#es-connectors-onedrive-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-onedrive-usage] -===== Usage - -To use this connector natively in Elastic Cloud, see <>. - -For additional operations, see <>. - -[discrete#es-connectors-onedrive-usage-connection] -====== Connecting to OneDrive - -To connect to OneDrive you need to https://learn.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal[create an Azure Active Directory application and service principal] that can access resources. - -Follow these steps: - -. Go to the https://portal.azure.com[Azure portal] and sign in with your Azure account. -. Navigate to the *Azure Active Directory* service. -. Select *App registrations* from the left-hand menu. -. Click on the *New registration* button to register a new application. -. Provide a name for your app, and optionally select the supported account types (e.g., single tenant, multi-tenant). -. Click on the *Register* button to create the app registration. -. After the registration is complete, you will be redirected to the app's overview page. Take note of the *Application (client) ID* value, as you'll need it later. -. Scroll down to the *API permissions* section and click on the *Add a permission* button. -. In the *Request API permissions* pane, select *Microsoft Graph* as the API. -. Choose the application permissions and select the following permissions under the *Application* tab: `User.Read.All`, `File.Read.All` -. Click on the *Add permissions* button to add the selected permissions to your app. -Finally, click on the *Grant admin consent* button to grant the required permissions to the app. This step requires administrative privileges. **NOTE**: If you are not an admin, you need to request the Admin to grant consent via their Azure Portal. -. Click on *Certificates & Secrets* tab. Go to Client Secrets. Generate a new client secret and keep a note of the string present under `Value` column. - -[discrete#es-connectors-onedrive-usage-configuration] -===== Configuration - -The following configuration fields are *required*: - -Azure application Client ID:: -Unique identifier for your Azure Application, found on the app's overview page. Example: -* `ab123453-12a2-100a-1123-93fd09d67394` - -Azure application Client Secret:: -String value that the application uses to prove its identity when requesting a token, available under the `Certificates & Secrets` tab of your Azure application menu. Example: -* `eyav1~12aBadIg6SL-STDfg102eBfCGkbKBq_Ddyu` - -Azure application Tenant ID:: -Unique identifier of your Azure Active Directory instance. Example: -* `123a1b23-12a3-45b6-7c8d-fc931cfb448d` - -Enable document level security:: -Toggle to enable <>. -When enabled: -* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -* Access control syncs will fetch users' access control lists and store them in a separate index. - -[WARNING] -==== -Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. -This impacts the speed at which your content can be retrieved. -==== - -[discrete#es-connectors-onedrive-usage-content-extraction] -===== Content Extraction - -Refer to <> for more details. - -[discrete#es-connectors-onedrive-documents-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Files* -** Includes metadata such as file name, path, size, content, etc. -* *Folders* - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced by default. -You must first enable <>. -Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-onedrive-connectors-onedrive-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-onedrive-dls] -===== Document level security - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. This feature is available by default for the OneDrive connector. -See <> for how to enable DLS for this connector. - -Refer to <> for more details about this feature. - -[NOTE] -==== -Refer to <> to learn how to ingest data with DLS enabled, when building a search application. -==== - -[discrete#es-connectors-onedrive-documents-sync-rules] -===== Sync rules - -_Basic_ sync rules are identical for all connectors and are available by default. -For more information read <>. - -[discrete#es-connectors-onedrive-sync-rules-advanced] -====== Advanced sync rules - -This connector supports <> for remote filtering. -These rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -Here are a few examples of advanced sync rules for this connector. - -[discrete#es-connectors-onedrive-sync-rules-advanced-examples-1] -*Example 1* - -This rule skips indexing for files with `.xlsx` and `.docx` extensions. -All other files and folders will be indexed. - -[source,js] ----- -[ - { - "skipFilesWithExtensions": [".xlsx" , ".docx"] - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-onedrive-sync-rules-advanced-examples-2] -*Example 2* - -This rule focuses on indexing files and folders owned by `user1-domain@onmicrosoft.com` and `user2-domain@onmicrosoft.com` but excludes files with `.py` extension. - -[source,js] ----- -[ - { - "owners": ["user1-domain@onmicrosoft.com", "user2-domain@onmicrosoft.com"], - "skipFilesWithExtensions": [".py"] - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-onedrive-sync-rules-advanced-examples-3] -*Example 3* - -This rule indexes only the files and folders directly inside the root folder, excluding any `.md` files. - -[source,js] ----- -[ - { - "skipFilesWithExtensions": [".md"], - "parentPathPattern": "/drive/root:" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-onedrive-sync-rules-advanced-examples-4] -*Example 4* - -This rule indexes files and folders owned by `user1-domain@onmicrosoft.com` and `user3-domain@onmicrosoft.com` that are directly inside the `abc` folder, which is a subfolder of any folder under the `hello` directory in the root. Files with extensions `.pdf` and `.py` are excluded. - -[source,js] ----- -[ - { - "owners": ["user1-domain@onmicrosoft.com", "user3-domain@onmicrosoft.com"], - "skipFilesWithExtensions": [".pdf", ".py"], - "parentPathPattern": "/drive/root:/hello/**/abc" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-onedrive-sync-rules-advanced-examples-5] -*Example 5* - -This example contains two rules. -The first rule indexes all files and folders owned by `user1-domain@onmicrosoft.com` and `user2-domain@onmicrosoft.com`. -The second rule indexes files for all other users, but skips files with a `.py` extension. - -[source,js] ----- -[ - { - "owners": ["user1-domain@onmicrosoft.com", "user2-domain@onmicrosoft.com"] - }, - { - "skipFilesWithExtensions": [".py"] - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-onedrive-sync-rules-advanced-examples-6] -*Example 6* - -This example contains two rules. -The first rule indexes all files owned by `user1-domain@onmicrosoft.com` and `user2-domain@onmicrosoft.com`, excluding `.md` files. -The second rule indexes files and folders recursively inside the `abc` folder. - -[source,js] ----- -[ - { - "owners": ["user1-domain@onmicrosoft.com", "user2-domain@onmicrosoft.com"], - "skipFilesWithExtensions": [".md"] - }, - { - "parentPathPattern": "/drive/root:/abc/**" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-onedrive-content-extraction] -===== Content Extraction - -See <>. - -[discrete#es-connectors-onedrive-known-issues] -===== Known issues - -* *Enabling document-level security impacts performance.* -+ -Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. This impacts the speed at which your content can be retrieved. - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-onedrive-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-onedrive-security] -===== Security - -See <>. -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-onedrive-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-onedrive-client-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. - -This self-managed connector is compatible with Elastic versions *8.10.0+*. - -To use this connector, satisfy all <>. - -[discrete#es-connectors-onedrive-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-onedrive-client-usage] -===== Usage -For additional operations, see <>. - -[discrete#es-connectors-onedrive-client-usage-connection] -====== Connecting to OneDrive - -To connect to OneDrive you need to https://learn.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal[create an Azure Active Directory application and service principal] that can access resources. - -Follow these steps: - -. Go to the https://portal.azure.com[Azure portal] and sign in with your Azure account. -. Navigate to the *Azure Active Directory* service. -. Select *App registrations* from the left-hand menu. -. Click on the *New registration* button to register a new application. -. Provide a name for your app, and optionally select the supported account types (e.g., single tenant, multi-tenant). -. Click on the *Register* button to create the app registration. -. After the registration is complete, you will be redirected to the app's overview page. Take note of the *Application (client) ID* value, as you'll need it later. -. Scroll down to the *API permissions* section and click on the *Add a permission* button. -. In the *Request API permissions* pane, select *Microsoft Graph* as the API. -. Choose the application permissions and select the following permissions under the *Application* tab: `User.Read.All`, `File.Read.All` -. Click on the *Add permissions* button to add the selected permissions to your app. -Finally, click on the *Grant admin consent* button to grant the required permissions to the app. This step requires administrative privileges. **NOTE**: If you are not an admin, you need to request the Admin to grant consent via their Azure Portal. -. Click on *Certificates & Secrets* tab. Go to Client Secrets. Generate a new client secret and keep a note of the string present under `Value` column. - -[discrete#es-connectors-onedrive-client-docker] -===== Deployment using Docker - -Self-managed connectors are run on your own infrastructure. - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-onedrive-client-usage-configuration] -===== Configuration - -The following configuration fields are *required*: - -`client_id`:: -Azure application Client ID, unique identifier for your Azure Application, found on the app's overview page. Example: -* `ab123453-12a2-100a-1123-93fd09d67394` - -`client_secret`:: -Azure application Client Secret, string value that the application uses to prove its identity when requesting a token. Available under the `Certificates & Secrets` tab of your Azure application menu. Example: -* `eyav1~12aBadIg6SL-STDfg102eBfCGkbKBq_Ddyu` - -`tenant_id`:: -Azure application Tenant ID: unique identifier of your Azure Active Directory instance. Example: -* `123a1b23-12a3-45b6-7c8d-fc931cfb448d` - -`retry_count`:: -The number of retry attempts after failed request to OneDrive. Default value is `3`. - -`use_document_level_security`:: -Toggle to enable <>. -When enabled: -* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -* Access control syncs will fetch users' access control lists and store them in a separate index. -+ -[WARNING] -==== -Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. -This impacts the speed at which your content can be retrieved. -==== - -`use_text_extraction_service`:: -Requires a separate deployment of the <>. -Requires that ingest pipeline settings disable text extraction. -Default value is `False`. - -[discrete#es-connectors-onedrive-client-usage-content-extraction] -===== Content Extraction - -Refer to <> for more details. - -[discrete#es-connectors-onedrive-client-documents-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Files* -** Includes metadata such as file name, path, size, content, etc. -* *Folders* - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. -* Permissions are not synced by default. -You must first enable <>. -Otherwise, *all documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-onedrive-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-onedrive-client-dls] -===== Document level security - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. This feature is available by default for the OneDrive connector. -See <> for how to enable DLS for this connector. - -Refer to <> for more details about this feature. - -[NOTE] -==== -Refer to <> to learn how to ingest data with DLS enabled, when building a search application. -==== - -[discrete#es-connectors-onedrive-client-documents-sync-rules] -===== Sync rules - -_Basic_ sync rules are identical for all connectors and are available by default. -For more information read <>. - -[discrete#es-connectors-onedrive-client-sync-rules-advanced] -====== Advanced sync rules - -This connector supports <> for remote filtering. -These rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -Here are a few examples of advanced sync rules for this connector. - -[discrete#es-connectors-onedrive-client-sync-rules-advanced-examples-1] -*Example 1* - -This rule skips indexing for files with `.xlsx` and `.docx` extensions. -All other files and folders will be indexed. - -[source,js] ----- -[ - { - "skipFilesWithExtensions": [".xlsx" , ".docx"] - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-onedrive-client-sync-rules-advanced-examples-2] -*Example 2* - -This rule focuses on indexing files and folders owned by `user1-domain@onmicrosoft.com` and `user2-domain@onmicrosoft.com` but excludes files with `.py` extension. - -[source,js] ----- -[ - { - "owners": ["user1-domain@onmicrosoft.com", "user2-domain@onmicrosoft.com"], - "skipFilesWithExtensions": [".py"] - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-onedrive-client-sync-rules-advanced-examples-3] -*Example 3* - -This rule indexes only the files and folders directly inside the root folder, excluding any `.md` files. - -[source,js] ----- -[ - { - "skipFilesWithExtensions": [".md"], - "parentPathPattern": "/drive/root:" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-onedrive-client-sync-rules-advanced-examples-4] -*Example 4* - -This rule indexes files and folders owned by `user1-domain@onmicrosoft.com` and `user3-domain@onmicrosoft.com` that are directly inside the `abc` folder, which is a subfolder of any folder under the `hello` directory in the root. Files with extensions `.pdf` and `.py` are excluded. - -[source,js] ----- -[ - { - "owners": ["user1-domain@onmicrosoft.com", "user3-domain@onmicrosoft.com"], - "skipFilesWithExtensions": [".pdf", ".py"], - "parentPathPattern": "/drive/root:/hello/**/abc" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-onedrive-client-sync-rules-advanced-examples-5] -*Example 5* - -This example contains two rules. -The first rule indexes all files and folders owned by `user1-domain@onmicrosoft.com` and `user2-domain@onmicrosoft.com`. -The second rule indexes files for all other users, but skips files with a `.py` extension. - -[source,js] ----- -[ - { - "owners": ["user1-domain@onmicrosoft.com", "user2-domain@onmicrosoft.com"] - }, - { - "skipFilesWithExtensions": [".py"] - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-onedrive-client-sync-rules-advanced-examples-6] -*Example 6* - -This example contains two rules. -The first rule indexes all files owned by `user1-domain@onmicrosoft.com` and `user2-domain@onmicrosoft.com`, excluding `.md` files. -The second rule indexes files and folders recursively inside the `abc` folder. - -[source,js] ----- -[ - { - "owners": ["user1-domain@onmicrosoft.com", "user2-domain@onmicrosoft.com"], - "skipFilesWithExtensions": [".md"] - }, - { - "parentPathPattern": "/drive/root:/abc/**" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-onedrive-client-content-extraction] -===== Content Extraction - -See <>. - -[discrete#es-connectors-onedrive-client-connector-client-operations] -===== Self-managed connector operations - -[discrete#es-connectors-onedrive-client-testing] -===== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the GitHub connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=onedrive ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=onedrive DATA_SIZE=small ----- - -[discrete#es-connectors-onedrive-client-known-issues] -===== Known issues - -* *Enabling document-level security impacts performance.* -+ -Enabling DLS for your connector will cause a significant performance degradation, as the API calls to the data source required for this functionality are rate limited. This impacts the speed at which your content can be retrieved. - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-onedrive-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-onedrive-client-security] -===== Security - -See <>. -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-opentext-documentum.asciidoc b/docs/reference/connector/docs/connectors-opentext-documentum.asciidoc deleted file mode 100644 index e320062240428..0000000000000 --- a/docs/reference/connector/docs/connectors-opentext-documentum.asciidoc +++ /dev/null @@ -1,162 +0,0 @@ -[#es-connectors-opentext] -=== Elastic OpenText Documentum connector reference -++++ -OpenText Documentum -++++ - -// Attributes used in this file -:service-name: OpenText Documentum -:service-name-stub: opentext_documentum - -[WARNING] -==== -This connector is an *example connector* that serves as a building block for customizations and is subject to change. -Its source code currently lives on a https://github.com/elastic/connectors/blob/opentext-connector-backup/connectors/sources/opentext_documentum.py[feature branch] and is yet not part of the main Elastic Connectors codebase. -The design and code is less mature than supported features and is being provided as-is with no warranties. -This connector is not subject to the support SLA of supported features. -==== - -The Elastic OpenText Documentum connector is written in Python using the https://github.com/elastic/connectors/tree/main?tab=readme-ov-file#connector-framework[Elastic connector framework]. View the https://github.com/elastic/connectors/blob/opentext-connector-backup/connectors/sources/opentext_documentum.py[source code] for this example connector. - -[discrete#es-connectors-opentext-documentum-connector-availability-and-prerequisites] -==== Availability and prerequisites - -This *example connector* was introduced in Elastic *8.14.0*, available as a *self-managed* self-managed connector on a feature branch, for testing and development purposes only. - -To use this connector, satisfy all <>. -Importantly, you must deploy the connectors service on your own infrastructure. -You have two deployment options: - -* <>. Use this option if you're comfortable working with Python and want to iterate quickly locally. -* <>. Use this option if you want to deploy the connectors to a server, or use a container orchestration platform. - -[discrete#es-connectors-opentext-documentum-connector-usage] -==== Usage - -To set up this connector in the UI, select the *OpenText Documentum* tile when creating a new connector under *Search -> Connectors*. - -If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. - -For additional operations, see <>. - -[discrete#es-connectors-opentext-documentum-connector-connecting-to-opentext-documentum] -==== Connecting to OpenText Documentum - -Basic authentication is used to connect with OpenText Documentum. - -[discrete#es-connectors-opentext-documentum-connector-configuration] -==== Configuration - -[discrete#es-connectors-opentext-documentum-connector-configure-opentext-documentum-connector] -===== Configure OpenText Documentum connector - -Note the following configuration fields: - -`OpenText Documentum host url` (required):: -The domain where OpenText Documentum is hosted. -Example: `https://192.158.1.38:2099/` - -`Username` (required):: -The username of the account to connect to OpenText Documentum. - -`Password` (required):: -The password of the account to connect to OpenText Documentum. - -`Repositories` (optional):: -Comma-separated list of repositories to fetch data from OpenText Documentum. If the value is `*` the connector will fetch data from all repositories present in the configured user’s account. -+ -Default value is `*`. -+ -Examples: -+ -* `elastic`, `kibana` -* `*` - -`Enable SSL` (optional):: -Enable SSL for the OpenText Documentum instance. - -`SSL Certificate` (Required if SSL is enabled):: -SSL certificate for the OpenText Documentum instance. -Example: -+ -``` ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- -``` - -[discrete#es-connectors-opentext-documentum-connector-content-extraction] -===== Content Extraction - -Refer to <>. - -[discrete#es-connectors-opentext-documentum-connector-documents-and-syncs] -==== Documents and syncs - -The connector syncs the following objects and entities: - -* *Repositories* -* *Cabinets* -* *Files & Folders* - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to the destination Elasticsearch index. -==== - -[discrete#es-connectors-opentext-documentum-connector-sync-types] -===== Sync types -<> are supported by default for all connectors. - -<> are not available for this connector in the present version. - -[discrete#es-connectors-opentext-documentum-connector-sync-rules] -==== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. - -[discrete#es-connectors-opentext-documentum-connector-connector-client-operations] -==== Connector Client operations - -[discrete#es-connectors-opentext-documentum-connector-end-to-end-testing] -===== End-to-end Testing - -The connector framework enables operators to run functional tests against a real data source, using Docker Compose. -You don't need a running Elasticsearch instance or OpenText Documentum source to run this test. - -Refer to <> for more details. - -To perform E2E testing for the OpenText Documentum connector, run the following command: - -```shell -$ make ftest NAME=opentext_documentum -``` -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=opentext_documentum DATA_SIZE=small ----- - -By default, `DATA_SIZE=MEDIUM`. - - -[discrete#es-connectors-opentext-documentum-connector-known-issues] -==== Known issues - -* There are no known issues for this connector. Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-opentext-documentum-connector-troubleshooting] -==== Troubleshooting - -See <>. - -[discrete#es-connectors-opentext-documentum-connector-security] -==== Security - -See <>. diff --git a/docs/reference/connector/docs/connectors-oracle.asciidoc b/docs/reference/connector/docs/connectors-oracle.asciidoc deleted file mode 100644 index 839a92985a7d5..0000000000000 --- a/docs/reference/connector/docs/connectors-oracle.asciidoc +++ /dev/null @@ -1,395 +0,0 @@ -[#es-connectors-oracle] -=== Elastic Oracle connector reference -++++ -Oracle -++++ - -// Attributes used in this file: -:service-name: Oracle -:service-name-stub: oracle - - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-oracle-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-oracle-availability-prerequisites] -===== Availability and prerequisites - -This connector is available natively in Elastic Cloud as of *8.12.0*. -To use this connector, satisfy all <>. - -[discrete#es-connectors-oracle-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-oracle-usage] -===== Usage - -To use this connector as a *managed connector*, see <>. - -The database user requires `CONNECT` and `DBA` privileges and must be the owner of the tables to be indexed. - -[discrete#es-connectors-oracle-usage-secure-connection] -====== Secure connection - -To set up a secure connection the Oracle service must be installed on the system where the connector is running. - -Follow these steps: - -. Set the `oracle_home` parameter to your Oracle home directory. -If configuration files are not at the default location, set the `wallet_configuration_path` parameter. -. Create a directory to store the wallet. -+ -[source,shell] ----- -$ mkdir $ORACLE_HOME/ssl_wallet ----- -. Create file named `sqlnet.ora` at `$ORACLE_HOME/network/admin` and add the following content: -+ -[source,shell] ----- -WALLET_LOCATION = (SOURCE = (METHOD = FILE) (METHOD_DATA = (DIRECTORY = $ORACLE_HOME/ssl_wallet))) -SSL_CLIENT_AUTHENTICATION = FALSE -SSL_VERSION = 1.0 -SSL_CIPHER_SUITES = (SSL_RSA_WITH_AES_256_CBC_SHA) -SSL_SERVER_DN_MATCH = ON ----- -+ -. Run the following commands to create a wallet and attach an SSL certificate. -Replace the file name with your file name. -+ -[source,shell] ----- -$ orapki wallet create -wallet path-to-oracle-home/ssl_wallet -auto_login_only -$ orapki wallet add -wallet path-to-oracle-home/ssl_wallet -trusted_cert -cert path-to-oracle-home/ssl_wallet/root_ca.pem -auto_login_only ----- - -For more information, refer to this https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Appendix.Oracle.Options.SSL.html[Amazon RDS documentation^] about Oracle SSL. -Oracle docs: https://docs.oracle.com/database/121/DBSEG/asossl.htm#DBSEG070. -// TODO: check whether this link is appropriate - -For additional operations, see <>. - -[discrete#es-connectors-oracle-compatability] -===== Compatibility - -This connector is compatible with Oracle Database versions *18c*, *19c* and *21c*. - -[discrete#es-connectors-oracle-configuration] -===== Configuration - -Use the following configuration fields to set up the connector: - -Connection source:: -Dropdown to determine the Oracle Source Connection: `Service Name` or `SID`. Default value is `SID`. Select 'Service Name' option if connecting to a pluggable database. - -SID:: -SID of the database. - -Service name:: -Service name for the database. - -Host:: -The IP address or hostname of the Oracle database server. -Default value is `127.0.0.1`. - -Port:: -Port number of the Oracle database server. - -Username:: -Username to use to connect to the Oracle database server. - -Password:: -Password to use to connect to the Oracle database server. - -Comma-separated list of tables:: -Comma-separated list of tables to monitor for changes. -Default value is `*`. -Examples: -* `TABLE_1, TABLE_2` -* `*` - -[discrete#es-connectors-oracle-documents-syncs] -===== Documents and syncs - -* Tables with no primary key defined are skipped. -* If the table's system change number (SCN) value is not between the `min(SCN)` and `max(SCN)` values of the `SMON_SCN_TIME` table, the connector will not be able to retrieve the most recently updated time. - Data will therefore index in every sync. - For more details refer to the following https://community.oracle.com/tech/apps-infra/discussion/4076446/show-error-about-ora-08181-specified-number-is-not-a-valid-system-change-number-when-using-scn-t[discussion thread^]. -* The `sys` user is not supported, as it contains 1000+ system tables. If you need to work with the `sys` user, use either `sysdba` or `sysoper` and configure this as the username. - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. -**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-oracle-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. -Currently, filtering is controlled by ingest pipelines. - -[discrete#es-connectors-oracle-known-issues] -===== Known issues - -There are no known issues for this connector. - -See <> for any issues affecting all connectors. - -[discrete#es-connectors-oracle-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-oracle-security] -===== Security - -See <>. - -[discrete#es-connectors-oracle-source] -===== Framework and source - -This connector is built with the {connectors-python}[Elastic connector framework^]. - -This connector uses the https://github.com/elastic/connectors/blob/{branch}/connectors/sources/generic_database.py[generic database connector source code^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -View {connectors-python}/connectors/sources/oracle.py[additional code specific to this data source^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-oracle-connector-client-reference] -==== *Self-managed connector reference* - -.View *self-managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-oracle-client-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. -This self-managed connector is compatible with Elastic versions *8.6.0+*. -To use this connector, satisfy all <>. - -[discrete#es-connectors-oracle-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-oracle-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, see <>. - -The database user requires `CONNECT` and `DBA` privileges and must be the owner of the tables to be indexed. - -[discrete#es-connectors-oracle-client-usage-secure-connection] -====== Secure connection - -To set up a secure connection the Oracle service must be installed on the system where the connector is running. - -Follow these steps: - -. Set the `oracle_home` parameter to your Oracle home directory. -If configuration files are not at the default location, set the `wallet_configuration_path` parameter. -. Create a directory to store the wallet. -+ -[source,shell] ----- -$ mkdir $ORACLE_HOME/ssl_wallet ----- -. Create file named `sqlnet.ora` at `$ORACLE_HOME/network/admin` and add the following content: -+ -[source,shell] ----- -WALLET_LOCATION = (SOURCE = (METHOD = FILE) (METHOD_DATA = (DIRECTORY = $ORACLE_HOME/ssl_wallet))) -SSL_CLIENT_AUTHENTICATION = FALSE -SSL_VERSION = 1.0 -SSL_CIPHER_SUITES = (SSL_RSA_WITH_AES_256_CBC_SHA) -SSL_SERVER_DN_MATCH = ON ----- -+ -. Run the following commands to create a wallet and attach an SSL certificate. -Replace the file name with your file name. -+ -[source,shell] ----- -$ orapki wallet create -wallet path-to-oracle-home/ssl_wallet -auto_login_only -$ orapki wallet add -wallet path-to-oracle-home/ssl_wallet -trusted_cert -cert path-to-oracle-home/ssl_wallet/root_ca.pem -auto_login_only ----- - -For more information, refer to this https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Appendix.Oracle.Options.SSL.html[Amazon RDS documentation^] about Oracle SSL. -Oracle docs: https://docs.oracle.com/database/121/DBSEG/asossl.htm#DBSEG070. -// TODO: check whether this link is appropriate - -For additional operations, see <>. - -[discrete#es-connectors-oracle-client-compatability] -===== Compatibility - -Oracle Database versions *18c*, *19c* and *21c* are compatible with Elastic connector frameworks. - -[discrete#es-connectors-oracle-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, these fields will use the default configuration set in the https://github.com/elastic/connectors/blob/{branch}/connectors/sources/generic_database.py[connector source code^]. -Note that this data source uses the `generic_database.py` connector source code. -Refer to https://github.com/elastic/connectors/blob/{branch}/connectors/sources/oracle.py[`oracle.py`^] for additional code, specific to this data source. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, users will be able to update these values in Kibana. -==== - -Use the following configuration fields to set up the connector: - -`connection_source`:: -Determines the Oracle source: Service Name or SID. Default value is SID. Select 'Service Name' if connecting to a pluggable database. - -`sid`:: -SID of the database. - -`service_name`:: -Service name for the database. - -`host`:: -The IP address or hostname of the Oracle database server. -Default value is `127.0.0.1`. - -`port`:: -Port number of the Oracle database server. - -`username`:: -Username to use to connect to the Oracle database server. - -`password`:: -Password to use to connect to the Oracle database server. - -`tables`:: -Comma-separated list of tables to monitor for changes. -Default value is `*`. -Examples: -* `TABLE_1, TABLE_2` -* `*` - -`oracle_protocol`:: -Protocol which the connector uses to establish a connection. -Default value is `TCP`. -For secure connections, use `TCPS`. - -`oracle_home`:: -Path to Oracle home directory to run connector in thick mode for secured connection. -For unsecured connections, keep this field empty. - -`wallet_configuration_path`:: -Path to SSL Wallet configuration files. - -`fetch_size`:: -Number of rows to fetch per request. -Default value is `50`. - -`retry_count`:: -Number of retry attempts after failed request to Oracle Database. -Default value is `3`. - -[discrete#es-connectors-oracle-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-oracle-client-documents-syncs] -===== Documents and syncs - -* Tables with no primary key defined are skipped. -* If the table's system change number (SCN) value is not between the `min(SCN)` and `max(SCN)` values of the `SMON_SCN_TIME` table, the connector will not be able to retrieve the most recently updated time. - Data will therefore index in every sync. - For more details refer to the following https://community.oracle.com/tech/apps-infra/discussion/4076446/show-error-about-ora-08181-specified-number-is-not-a-valid-system-change-number-when-using-scn-t[discussion thread^]. -* The `sys` user is not supported, as it contains 1000+ system tables. If you need to work with the `sys` user, use either `sysdba` or `sysoper` and configure this as the username. - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. -**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-oracle-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. -Currently, filtering is controlled by ingest pipelines. - -[discrete#es-connectors-oracle-client-operations] -===== Self-managed connector operations - -[discrete#es-connectors-oracle-client-operations-testing] -====== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To execute a functional test for the Oracle connector, run the following command: - -[source,shell] ----- -make ftest NAME=oracle ----- - -By default, this will use a medium-sized dataset. -To make the test faster add the `DATA_SIZE=small` argument: - -[source,shell] ----- -make ftest NAME=oracle DATA_SIZE=small ----- - -[discrete#es-connectors-oracle-client-known-issues] -===== Known issues - -There are no known issues for this connector. - -See <> for any issues affecting all connectors. - -[discrete#es-connectors-oracle-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-oracle-client-security] -===== Security - -See <>. - -[discrete#es-connectors-oracle-client-source] -===== Framework and source - -This connector is built with the {connectors-python}[Elastic connector framework^]. - -This connector uses the https://github.com/elastic/connectors/blob/{branch}/connectors/sources/generic_database.py[generic database connector source code^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -View {connectors-python}/connectors/sources/oracle.py[additional code specific to this data source^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-outlook.asciidoc b/docs/reference/connector/docs/connectors-outlook.asciidoc deleted file mode 100644 index e032fb243dfa9..0000000000000 --- a/docs/reference/connector/docs/connectors-outlook.asciidoc +++ /dev/null @@ -1,446 +0,0 @@ -[#es-connectors-outlook] -=== Elastic Outlook connector reference -++++ -Outlook -++++ -// Attributes used in this file -:service-name: Outlook -:service-name-stub: outlook - -The Elastic Outlook connector is built with the Elastic connector framework and is available as a self-managed <>. - - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-outlook-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-outlook-availability-and-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* (managed service) in Elastic Cloud. - -This connector is compatible with Elastic versions *8.13.0+*. - -To use this connector, satisfy all <>. - -[discrete#es-connectors-outlook-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-outlook-usage] -===== Usage - -To use this connector as a managed connector in Elastic Cloud, use the *Connectors* workflow in the Kibana UI. - -To create a new {service-name} connector: - -. Navigate to *Search -> Connectors* page in the Kibana UI. -. Select the *New Native Connector* button. -. Select the *{service-name}* connector. - -For additional operations, see <>. - -[discrete#es-connectors-outlook-connecting-to-outlook] -===== Connecting to Outlook - -Outlook connector supports both cloud (Office365 Outlook) and on-premises (Exchange Server) platforms. - -[discrete#es-connectors-outlook-connect-to-exchange-server] -====== Connect to Exchange Server - -In order to connect to Exchange server, the connector fetches Active Directory users with the help of `ldap3` python library. - -[discrete#es-connectors-outlook-connect-to-office365-outlook-outlook-cloud] -====== Connect to Office365 Outlook (Outlook Cloud) - -To integrate with the Outlook connector using Azure, follow these steps to create and configure an Azure application: - -1. Navigate to the https://portal.azure.com/[Azure Portal] and log in using your credentials. -2. Click on *App registrations* to register a new application. -3. Navigate to the *Overview* tab. Make a note of the `Client ID` and `Tenant ID`. -4. Click on the *Certificates & secrets* tab and create a new client secret. Keep this secret handy. -5. Go to the *API permissions* tab. - * Click on "Add permissions." - * Choose "APIs my organization uses." - * Search for and select "Office 365 Exchange Online." - * Add the `full_access_as_app` application permission. - -You can now use the Client ID, Tenant ID, and Client Secret you've noted to configure the Outlook connector. - -[discrete#es-connectors-outlook-configuration] -===== Configuration - -Outlook data source (required):: -Dropdown to determine Outlook platform type: `outlook_cloud` or `outlook_server`. Default value is `outlook_cloud`. - -Tenant ID:: -Required if data source is `outlook_cloud`. -The Tenant ID for the Azure account hosting the Outlook instance. - -Client ID:: -Required if data source is `outlook_cloud`. -The Client ID to authenticate with Outlook instance. - -Client Secret Value:: -Required if data source is `outlook_cloud`. -The Client Secret value to authenticate with Outlook instance. - -Exchange Server:: -Required if data source is `outlook_server`. -IP address to connect with Exchange server. Example: `127.0.0.1` - -Active Directory Server:: -Required if data source is `outlook_server`. -IP address to fetch users from Exchange Active Directory to fetch data. Example: `127.0.0.1` - -Exchange server username:: -Required if data source is `outlook_server`. -Username to authenticate with Exchange server. - -Exchange server password:: -Required if data source is `outlook_server`. -Password to authenticate with Exchange server. - -Exchange server domain name:: -Required if data source is `outlook_server`. -Domain name for Exchange server users such as `gmail.com` or `exchange.local`. - -Enable SSL:: -Whether SSL verification will be enabled. Default value is `False`. -*Note:* This configuration is applicable for `Outlook Server` only. - -SSL certificate:: -Required if ssl is enabled. -Content of SSL certificate. Example certificate: -+ -[text] -[source, txt] ----- ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- ----- -+ -*Note:* This configuration is applicable for `Outlook Server` only. - -Document level security:: -Toggle to enable <>. -When enabled: -+ -* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -* Access control syncs fetch users' access control lists and store them in a separate index. - -[discrete#es-connectors-outlook-content-extraction] -===== Content Extraction - -Refer to <>. - -[discrete#es-connectors-outlook-documents-and-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Mails* -** *Inbox Mails* -** *Sent Mails* -** *Archive Mails* -** *Junk Mails* -* *Contacts* -* *Calendar Events* -* *Tasks* -* *Attachments* -** *Mail Attachments* -** *Task Attachments* -** *Calendar Attachments* - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-outlook-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-outlook-document-level-security] -===== Document level security - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. -Refer to <> on this page for how to enable DLS for this connector. - -[NOTE] -==== -Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. -The example uses SharePoint Online as the data source, but the same steps apply to every connector. -==== - -[discrete#es-connectors-outlook-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-outlook-advanced-sync-rules] -===== Advanced Sync Rules - -Advanced sync rules are not available for this connector in the present version. - -[discrete#es-connectors-outlook-connector-client-operations] -===== Connector Client operations - -[discrete#es-connectors-outlook-end-to-end-testing] -====== End-to-end Testing - -*Note:* End-to-end testing is not available in the current version of the connector. - -[discrete#es-connectors-outlook-known-issues] -====== Known issues - -There are currently no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-outlook-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-outlook-security] -===== Security - -See <>. - -[discrete#es-connectors-outlook-source] -===== Framework and source - -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/outlook.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-outlook-connector-client-reference] -==== *Self-managed connector reference* - -.View *self-managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-outlook-client-availability-and-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. To use this connector, satisfy all <>. - -[discrete#es-connectors-outlook-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-outlook-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, use the *Outlook* tile from the connectors list OR *Customized connector* workflow. - -For additional operations, see <>. - -[discrete#es-connectors-outlook-client-connecting-to-outlook] -===== Connecting to Outlook - -Outlook connector supports both cloud (Office365 Outlook) and on-premises (Exchange Server) platforms. - -[discrete#es-connectors-outlook-client-connect-to-exchange-server] -====== Connect to Exchange Server - -In order to connect to Exchange server, the connector fetches Active Directory users with the help of `ldap3` python library. - -[discrete#es-connectors-outlook-client-connect-to-office365-outlook-outlook-cloud] -====== Connect to Office365 Outlook (Outlook Cloud) - -To integrate with the Outlook connector using Azure, follow these steps to create and configure an Azure application: - -1. Navigate to the https://portal.azure.com/[Azure Portal] and log in using your credentials. -2. Click on *App registrations* to register a new application. -3. Navigate to the *Overview* tab. Make a note of the `Client ID` and `Tenant ID`. -4. Click on the *Certificates & secrets* tab and create a new client secret. Keep this secret handy. -5. Go to the *API permissions* tab. - * Click on "Add permissions." - * Choose "APIs my organization uses." - * Search for and select "Office 365 Exchange Online." - * Add the `full_access_as_app` application permission. - -You can now use the Client ID, Tenant ID, and Client Secret you've noted to configure the Outlook connector. - -[discrete#es-connectors-outlook-client-configuration] -===== Configuration - -`data_source`:: (required) -Dropdown to determine Outlook platform type: `outlook_cloud` or `outlook_server`. Default value is `outlook_cloud`. - -`tenant_id`:: (required if data source is outlook_cloud) -The Tenant ID for the Azure account hosting the Outlook instance. - -`client_id`:: (required if data source is outlook_cloud) -The Client ID to authenticate with Outlook instance. - -`client_secret`:: (required if data source is outlook_cloud) -The Client Secret value to authenticate with Outlook instance. - -`exchange_server`:: (required if data source is outlook_server) -IP address to connect with Exchange server. Example: `127.0.0.1` - -`active_directory_server`:: (required if data source is outlook_server) -IP address to fetch users from Exchange Active Directory to fetch data. Example: `127.0.0.1` - -`username`:: (required if data source is outlook_server) -Username to authenticate with Exchange server. - -`password`:: (required if data source is outlook_server) -Password to authenticate with Exchange server. - -`domain`:: (required if data source is outlook_server) -Domain name for Exchange server users such as `gmail.com` or `exchange.local`. - -`ssl_enabled`:: -Whether SSL verification will be enabled. Default value is `False`. -*Note:* This configuration is applicable for `Outlook Server` only. - -`ssl_ca`:: (required if ssl is enabled) -Content of SSL certificate. Example certificate: -+ -[text] -[source, txt] ----- ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- ----- - -`use_text_extraction_service`:: -Use <>. -Default value is `False`. - -`document_level_security`:: -Toggle to enable <>. -When enabled: -+ -* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -* Access control syncs fetch users' access control lists and store them in a separate index. - -*Note:* This configuration is applicable for `Outlook Server` only. - -[discrete#es-connectors-outlook-client-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-outlook-client-content-extraction] -===== Content Extraction - -Refer to <>. - -[discrete#es-connectors-outlook-client-documents-and-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Mails* -** *Inbox Mails* -** *Sent Mails* -** *Archive Mails* -** *Junk Mails* -* *Contacts* -* *Calendar Events* -* *Tasks* -* *Attachments* -** *Mail Attachments* -** *Task Attachments* -** *Calendar Attachments* - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. -* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-outlook-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-outlook-client-document-level-security] -===== Document level security - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. -Refer to <> on this page for how to enable DLS for this connector. - -[NOTE] -==== -Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. -The example uses SharePoint Online as the data source, but the same steps apply to every connector. -==== - -[discrete#es-connectors-outlook-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-outlook-client-advanced-sync-rules] -===== Advanced Sync Rules - -Advanced sync rules are not available for this connector in the present version. - -[discrete#es-connectors-outlook-client-connector-client-operations] -===== Connector Client operations - -[discrete#es-connectors-outlook-client-end-to-end-testing] -====== End-to-end Testing - -*Note:* End-to-end testing is not available in the current version of the connector. - -[discrete#es-connectors-outlook-client-known-issues] -====== Known issues - -There are currently no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-outlook-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-outlook-client-security] -===== Security - -See <>. - -[discrete#es-connectors-outlook-client-source] -===== Framework and source - -This connector is included in the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/outlook.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-postgresql.asciidoc b/docs/reference/connector/docs/connectors-postgresql.asciidoc deleted file mode 100644 index aa6cb7f29e633..0000000000000 --- a/docs/reference/connector/docs/connectors-postgresql.asciidoc +++ /dev/null @@ -1,644 +0,0 @@ -[#es-connectors-postgresql] -=== Elastic PostgreSQL connector reference -++++ -PostgreSQL -++++ -// Attributes used in this file -:service-name: PostgreSQL -:service-name-stub: postgresql - -The _Elastic PostgreSQL connector_ is a connector for https://www.postgresql.org[PostgreSQL^]. -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -This connector uses the https://github.com/elastic/connectors/blob/{branch}/connectors/sources/generic_database.py[generic database connector source code^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). -View the specific {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - - -.Choose your connector reference -******************************* -Are you using an Elastic managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#connectors-postgresql-native-connector-reference] -=== *Elastic managed connector (Elastic Cloud)* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#connectors-postgresql-availability-prerequisites] -==== Availability and prerequisites - -This connector is available as an *Elastic managed connector* in Elastic versions *8.8.0 and later*. -To use this connector natively in Elastic Cloud, satisfy all <>. - -[discrete#connectors-postgresql-create-native-connector] -==== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#connectors-postgresql-usage] -==== Usage - -To use this connector as an *Elastic managed connector*, use the *Connector* workflow. -See <>. - -[TIP] -==== -Users must set `track_commit_timestamp` to `on`. -To do this, run `ALTER SYSTEM SET track_commit_timestamp = on;` in PostgreSQL server. -==== - -For additional operations, see <<-esconnectors-usage>>. - -[NOTE] -==== -For an end-to-end example of the connector client workflow, see <>. -==== - -[discrete#connectors-postgresql-compatibility] -==== Compatibility - -PostgreSQL versions 11 to 15 are compatible with the Elastic connector. - -[discrete#connectors-postgresql-configuration] -==== Configuration - -Set the following configuration fields: - -Host:: -The server host address where the PostgreSQL instance is hosted. -Examples: -+ -* `192.158.1.38` -* `demo.instance.demo-region.demo.service.com` - -Port:: -The port where the PostgreSQL instance is hosted. -Examples: -+ -* `5432` (default) - -Username:: -The username of the PostgreSQL account. - -Password:: -The password of the PostgreSQL account. - -Database:: -Name of the PostgreSQL database. -Examples: -+ -* `employee_database` -* `customer_database` - -Schema:: -The schema of the PostgreSQL database. - -Comma-separated List of Tables:: -A list of tables separated by commas. -The PostgreSQL connector will fetch data from all tables present in the configured database, if the value is `*` . -Default value is `*`. -Examples: -+ -* `table_1, table_2` -* `*` -+ -[WARNING] -==== -This field can be bypassed when using advanced sync rules. -==== - -Enable SSL:: -Toggle to enable SSL verification. -Disabled by default. - -SSL Certificate:: -Content of SSL certificate. -If SSL is disabled, the `ssl_ca` value will be ignored. -+ -.*Expand* to see an example certificate -[%collapsible] -==== -``` ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -AlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHROb2RlMDExFjAUBgNV -BAsTDURlZmF1bHRDZWxsMDExGTAXBgNVBAsTEFJvb3QgQ2VydGlmaWNhdGUxEjAQ -BgNVBAMTCWxvY2FsaG9zdDAeFw0yMTEyMTQyMjA3MTZaFw0yMjEyMTQyMjA3MTZa -MF8xCzAJBgNVBAYTAlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHRO -b2RlMDExFjAUBgNVBAsTDURlZmF1bHRDZWxsMDExEjAQBgNVBAMTCWxvY2FsaG9z -dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMv5HCsJZIpI5zCy+jXV -z6lmzNc9UcVSEEHn86h6zT6pxuY90TYeAhlZ9hZ+SCKn4OQ4GoDRZhLPTkYDt+wW -CV3NTIy9uCGUSJ6xjCKoxClJmgSQdg5m4HzwfY4ofoEZ5iZQ0Zmt62jGRWc0zuxj -hegnM+eO2reBJYu6Ypa9RPJdYJsmn1RNnC74IDY8Y95qn+WZj//UALCpYfX41hko -i7TWD9GKQO8SBmAxhjCDifOxVBokoxYrNdzESl0LXvnzEadeZTd9BfUtTaBHhx6t -njqqCPrbTY+3jAbZFd4RiERPnhLVKMytw5ot506BhPrUtpr2lusbN5svNXjuLeea -MMUCAwEAAaOBoDCBnTATBgNVHSMEDDAKgAhOatpLwvJFqjAdBgNVHSUEFjAUBggr -BgEFBQcDAQYIKwYBBQUHAwIwVAYDVR0RBE0wS4E+UHJvZmlsZVVVSUQ6QXBwU3J2 -MDEtQkFTRS05MDkzMzJjMC1iNmFiLTQ2OTMtYWI5NC01Mjc1ZDI1MmFmNDiCCWxv -Y2FsaG9zdDARBgNVHQ4ECgQITzqhA5sO8O4wDQYJKoZIhvcNAQELBQADggEBAKR0 -gY/BM69S6BDyWp5dxcpmZ9FS783FBbdUXjVtTkQno+oYURDrhCdsfTLYtqUlP4J4 -CHoskP+MwJjRIoKhPVQMv14Q4VC2J9coYXnePhFjE+6MaZbTjq9WaekGrpKkMaQA -iQt5b67jo7y63CZKIo9yBvs7sxODQzDn3wZwyux2vPegXSaTHR/rop/s/mPk3YTS -hQprs/IVtPoWU4/TsDN3gIlrAYGbcs29CAt5q9MfzkMmKsuDkTZD0ry42VjxjAmk -xw23l/k8RoD1wRWaDVbgpjwSzt+kl+vJE/ip2w3h69eEZ9wbo6scRO5lCO2JM4Pr -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- -``` -==== - -[discrete#connectors-postgresql-documents-syncs] -==== Documents and syncs - -* Tables must be owned by a PostgreSQL user. -* Tables with no primary key defined are skipped. -* To fetch the last updated time in PostgreSQL, `track_commit_timestamp` must be set to `on`. -Otherwise, all data will be indexed in every sync. - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. -**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#connectors-postgresql-sync-rules] -==== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#connectors-postgresql-sync-rules-advanced] -===== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -[discrete#connectors-postgresql-sync-rules-advanced-example-data] -====== Example data - -Here is some example data that will be used in the following examples. - -[discrete#connectors-postgresql-sync-rules-advanced-example-data-1] -*`employee` table* - -[cols="3*", options="header"] -|=== -| emp_id | name | age -| 3 | John | 28 -| 10 | Jane | 35 -| 14 | Alex | 22 -|=== - -[discrete#connectors-postgresql-sync-rules-advanced-example-2] -*`customer` table* - -[cols="3*", options="header"] -|=== -| c_id | name | age -| 2 | Elm | 24 -| 6 | Pine | 30 -| 9 | Oak | 34 -|=== - -[discrete#connectors-postgresql-sync-rules-advanced-examples] -====== Advanced sync rules examples - -[discrete#connectors-postgresql-sync-rules-advanced-examples-1] -*Multiple table queries* - -[source,js] ----- -[ - { - "tables": [ - "employee" - ], - "query": "SELECT * FROM employee" - }, - { - "tables": [ - "customer" - ], - "query": "SELECT * FROM customer" - } -] ----- -// NOTCONSOLE - -[discrete#connectors-postgresql-sync-rules-advanced-examples-1-id-columns] -*Multiple table queries with `id_columns`* - -In 8.15.0, we added a new optional `id_columns` field in our advanced sync rules for the PostgreSQL connector. -Use the `id_columns` field to ingest tables which do not have a primary key. Include the names of unique fields so that the connector can use them to generate unique IDs for documents. - -[source,js] ----- -[ - { - "tables": [ - "employee" - ], - "query": "SELECT * FROM employee", - "id_columns": ["emp_id"] - }, - { - "tables": [ - "customer" - ], - "query": "SELECT * FROM customer", - "id_columns": ["c_id"] - } -] ----- -// NOTCONSOLE - -This example uses the `id_columns` field to specify the unique fields `emp_id` and `c_id` for the `employee` and `customer` tables, respectively. - -[discrete#connectors-postgresql-sync-rules-advanced-examples-2] -*Filtering data with `WHERE` clause* - -[source,js] ----- -[ - { - "tables": ["employee"], - "query": "SELECT * FROM employee WHERE emp_id > 5" - } -] ----- -// NOTCONSOLE - -[discrete#connectors-postgresql-sync-rules-advanced-examples-3] -*`JOIN` operations* - -[source,js] ----- -[ - { - "tables": ["employee", "customer"], - "query": "SELECT * FROM employee INNER JOIN customer ON employee.emp_id = customer.c_id" - } -] ----- -// NOTCONSOLE - -[WARNING] -==== -When using advanced rules, a query can bypass the configuration field `tables`. -This will happen if the query specifies a table that doesn't appear in the configuration. -This can also happen if the configuration specifies `*` to fetch all tables while the advanced sync rule requests for only a subset of tables. -==== - -[discrete#connectors-postgresql-known-issues] -==== Known issues - -There are no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#connectors-postgresql-troubleshooting] -==== Troubleshooting - -See <>. - -[discrete#connectors-postgresql-security] -==== Security - -See <>. - -// Closing the collapsible section -=============== - -[discrete#es-connectors-postgresql-connector-client-reference] -=== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-postgresql-client-availability-prerequisites] -==== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. -To use this connector, satisfy all <>. - -[discrete#es-connectors-postgresql-create-connector-client] -==== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-postgresql-client-usage] -==== Usage - -To use this connector as a *self-managed connector*, see <>. -[TIP] -==== -Users must set `track_commit_timestamp` to `on`. -To do this, run `ALTER SYSTEM SET track_commit_timestamp = on;` in PostgreSQL server. -==== - -For additional operations, see. - -[NOTE] -==== -For an end-to-end example of the self-managed connector workflow, see <>. -==== - -[discrete#es-connectors-postgresql-client-compatibility] -==== Compatibility - -PostgreSQL versions 11 to 15 are compatible with Elastic connector frameworks. - -[discrete#es-connectors-postgresql-client-configuration] -==== Configuration - -[TIP] -==== -When using the <>, initially these fields will use the default configuration set in the https://github.com/elastic/connectors-python/blob/{branch}/connectors/sources/postgresql.py[connector source code^]. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, users will be able to update these values in Kibana. -==== - -Set the following configuration fields: - -`host`:: -The server host address where the PostgreSQL instance is hosted. -Examples: -+ -* `192.158.1.38` -* `demo.instance.demo-region.demo.service.com` - -`port`:: -The port where the PostgreSQL instance is hosted. -Examples: -+ -* `5432` -* `9090` - -`username`:: -The username of the PostgreSQL account. - -`password`:: -The password of the PostgreSQL account. - -`database`:: -Name of the PostgreSQL database. -Examples: -+ -* `employee_database` -* `customer_database` - -`schema`:: -The schema of the PostgreSQL database. - -`tables`:: -A list of tables separated by commas. -The PostgreSQL connector will fetch data from all tables present in the configured database, if the value is `*` . -Default value is `*`. -Examples: -+ -* `table_1, table_2` -* `*` -+ -[WARNING] -==== -This field can be bypassed when using advanced sync rules. -==== - -`ssl_enabled`:: -Whether SSL verification will be enabled. -Default value is `True`. - -`ssl_ca`:: -Content of SSL certificate (if SSL is enabled). -If SSL is disabled, the `ssl_ca` value will be ignored. -+ -.*Expand* to see an example certificate -[%collapsible] -==== -``` ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -AlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHROb2RlMDExFjAUBgNV -BAsTDURlZmF1bHRDZWxsMDExGTAXBgNVBAsTEFJvb3QgQ2VydGlmaWNhdGUxEjAQ -BgNVBAMTCWxvY2FsaG9zdDAeFw0yMTEyMTQyMjA3MTZaFw0yMjEyMTQyMjA3MTZa -MF8xCzAJBgNVBAYTAlVTMQwwCgYDVQQKEwNJQk0xFjAUBgNVBAsTDURlZmF1bHRO -b2RlMDExFjAUBgNVBAsTDURlZmF1bHRDZWxsMDExEjAQBgNVBAMTCWxvY2FsaG9z -dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMv5HCsJZIpI5zCy+jXV -z6lmzNc9UcVSEEHn86h6zT6pxuY90TYeAhlZ9hZ+SCKn4OQ4GoDRZhLPTkYDt+wW -CV3NTIy9uCGUSJ6xjCKoxClJmgSQdg5m4HzwfY4ofoEZ5iZQ0Zmt62jGRWc0zuxj -hegnM+eO2reBJYu6Ypa9RPJdYJsmn1RNnC74IDY8Y95qn+WZj//UALCpYfX41hko -i7TWD9GKQO8SBmAxhjCDifOxVBokoxYrNdzESl0LXvnzEadeZTd9BfUtTaBHhx6t -njqqCPrbTY+3jAbZFd4RiERPnhLVKMytw5ot506BhPrUtpr2lusbN5svNXjuLeea -MMUCAwEAAaOBoDCBnTATBgNVHSMEDDAKgAhOatpLwvJFqjAdBgNVHSUEFjAUBggr -BgEFBQcDAQYIKwYBBQUHAwIwVAYDVR0RBE0wS4E+UHJvZmlsZVVVSUQ6QXBwU3J2 -MDEtQkFTRS05MDkzMzJjMC1iNmFiLTQ2OTMtYWI5NC01Mjc1ZDI1MmFmNDiCCWxv -Y2FsaG9zdDARBgNVHQ4ECgQITzqhA5sO8O4wDQYJKoZIhvcNAQELBQADggEBAKR0 -gY/BM69S6BDyWp5dxcpmZ9FS783FBbdUXjVtTkQno+oYURDrhCdsfTLYtqUlP4J4 -CHoskP+MwJjRIoKhPVQMv14Q4VC2J9coYXnePhFjE+6MaZbTjq9WaekGrpKkMaQA -iQt5b67jo7y63CZKIo9yBvs7sxODQzDn3wZwyux2vPegXSaTHR/rop/s/mPk3YTS -hQprs/IVtPoWU4/TsDN3gIlrAYGbcs29CAt5q9MfzkMmKsuDkTZD0ry42VjxjAmk -xw23l/k8RoD1wRWaDVbgpjwSzt+kl+vJE/ip2w3h69eEZ9wbo6scRO5lCO2JM4Pr -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- -``` -==== - -[discrete#es-connectors-postgresql-client-docker] -==== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-postgresql-client-documents-syncs] -==== Documents and syncs - -* Tables must be owned by a PostgreSQL user. -* Tables with no primary key defined are skipped. -* To fetch the last updated time in PostgreSQL, `track_commit_timestamp` must be set to `on`. -Otherwise, all data will be indexed in every sync. - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. -**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-postgresql-client-sync-rules] -==== Sync rules - -//sync-rules-basic,Basic sync rules are identical for all connectors and are available by default. - -[discrete#es-connectors-postgresql-client-sync-rules-advanced] -===== Advanced sync rules - -[NOTE] -==== -A //connectors-sync-types-full, full sync is required for advanced sync rules to take effect. -==== - -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -[discrete#es-connectors-postgresql-client-sync-rules-advanced-example-data] -====== Example data - -Here is some example data that will be used in the following examples. - -[discrete#es-connectors-postgresql-client-sync-rules-advanced-example-data-1] -*`employee` table* - -[cols="3*", options="header"] -|=== -| emp_id | name | age -| 3 | John | 28 -| 10 | Jane | 35 -| 14 | Alex | 22 -|=== - -[discrete#es-connectors-postgresql-client-sync-rules-advanced-example-2] -*`customer` table* - -[cols="3*", options="header"] -|=== -| c_id | name | age -| 2 | Elm | 24 -| 6 | Pine | 30 -| 9 | Oak | 34 -|=== - -[discrete#es-connectors-postgresql-client-sync-rules-advanced-examples] -====== Advanced sync rules examples - -[discrete#es-connectors-postgresql-client-sync-rules-advanced-examples-1] -*Multiple table queries* - -[source,js] ----- -[ - { - "tables": [ - "employee" - ], - "query": "SELECT * FROM employee" - }, - { - "tables": [ - "customer" - ], - "query": "SELECT * FROM customer" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-postgresql-client-sync-rules-advanced-examples-1-id-columns] -*Multiple table queries with `id_columns`* - -In 8.15.0, we added a new optional `id_columns` field in our advanced sync rules for the PostgreSQL connector. -Use the `id_columns` field to ingest tables which do not have a primary key. Include the names of unique fields so that the connector can use them to generate unique IDs for documents. - -[source,js] ----- -[ - { - "tables": [ - "employee" - ], - "query": "SELECT * FROM employee", - "id_columns": ["emp_id"] - }, - { - "tables": [ - "customer" - ], - "query": "SELECT * FROM customer", - "id_columns": ["c_id"] - } -] ----- -// NOTCONSOLE - -This example uses the `id_columns` field to specify the unique fields `emp_id` and `c_id` for the `employee` and `customer` tables, respectively. - -[discrete#es-connectors-postgresql-client-sync-rules-advanced-examples-2] -*Filtering data with `WHERE` clause* - -[source,js] ----- -[ - { - "tables": ["employee"], - "query": "SELECT * FROM employee WHERE emp_id > 5" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-postgresql-client-sync-rules-advanced-examples-3] -*`JOIN` operations* - -[source,js] ----- -[ - { - "tables": ["employee", "customer"], - "query": "SELECT * FROM employee INNER JOIN customer ON employee.emp_id = customer.c_id" - } -] ----- -// NOTCONSOLE - -[WARNING] -==== -When using advanced rules, a query can bypass the configuration field `tables`. -This will happen if the query specifies a table that doesn't appear in the configuration. -This can also happen if the configuration specifies `*` to fetch all tables while the advanced sync rule requests for only a subset of tables. -==== - -[discrete#es-connectors-postgresql-client-client-operations-testing] -==== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the PostgreSQL connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=postgresql ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=postgresql DATA_SIZE=small ----- - -[discrete#es-connectors-postgresql-client-known-issues] -==== Known issues - -There are no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-postgresql-client-troubleshooting] -==== Troubleshooting - -See <>. - -[discrete#es-connectors-postgresql-client-security] -==== Security - -See <>. - -// Closing the collapsible section -=============== \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-redis.asciidoc b/docs/reference/connector/docs/connectors-redis.asciidoc deleted file mode 100644 index 7aad7b0b41497..0000000000000 --- a/docs/reference/connector/docs/connectors-redis.asciidoc +++ /dev/null @@ -1,293 +0,0 @@ -[#es-connectors-redis] -=== Redis connector reference -++++ -Redis -++++ - -// Attributes (AKA variables) used in this file -:service-name: Redis -:service-name-stub: redis - -The Redis connector is built with the Elastic connectors Python framework and is available as a self-managed <>. -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -[discrete#es-connectors-redis-connector-availability-and-prerequisites] -==== Availability and prerequisites - -This connector was introduced in Elastic *8.13.0*, available as a *self-managed* self-managed connector. - -To use this connector, satisfy all <>. -Importantly, you must deploy the connectors service on your own infrastructure. -You have two deployment options: - -* <>. Use this option if you're comfortable working with Python and want to iterate quickly locally. -* <>. Use this option if you want to deploy the connectors to a server, or use a container orchestration platform. - -[NOTE] -==== -This connector is in *technical preview* and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Technical preview features are not subject to the support SLA of official GA features. -==== - -[discrete#es-connectors-redis-connector-usage] -==== Usage - -To set up this connector in the UI, select the *Redis* tile when creating a new connector under *Search -> Connectors*. - -For additional operations, see <>. - -[discrete#es-connectors-redis-connector-docker] -==== Deploy with Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-redis-connector-configuration] -==== Configuration - -`host` (required):: -The IP of your Redis server/cloud. Example: - -* `127.0.0.1` -* `redis-12345.us-east-1.ec2.cloud.redislabs.com` - -`port` (required):: -Port where the Redis server/cloud instance is hosted. Example: - -* `6379` - -`username` (optional):: -Username for your Redis server/cloud. Example: - -* `default` - -`password` (optional):: -Password for your Redis server/cloud instance. Example: - -* `changeme` - -`database` (required):: -List of database index for your Redis server/cloud. * will fetch data from all databases. Example: - -* `0,1,2` -* `*` -+ -[NOTE] -==== -This field is ignored when using advanced sync rules. -==== - -`ssl_enabled`:: -Toggle to use SSL/TLS. Disabled by default. - -`mutual_tls_enabled`:: -Toggle to use secure mutual SSL/TLS. Ensure that your Redis deployment supports mutual SSL/TLS connections. Disabled by default. Depends on `ssl_enabled`. - -`tls_certfile`:: -Specifies the certificate from the Certificate Authority. The value of the certificate is used to validate the certificate presented by the Redis instance. Depends on `mutual_tls_enabled`. - -`tls_keyfile`:: -Specifies the client private key. The value of the key is used to validate the connection in the Redis instance. -Depends on `mutual_tls_enabled`. - -[discrete#es-connectors-redis-connector-documents-and-syncs] -==== Documents and syncs - -The connector syncs the following objects and entities: - -* KEYS and VALUES of every database index - - -[NOTE] -==== -* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to the relevant Elasticsearch index. -==== - -[discrete#es-connectors-redis-connector-sync-rules] -==== Sync rules -<> are identical for all connectors and are available by default. - - -[discrete#es-connectors-redis-connector-advanced-sync-rules] -==== Advanced Sync Rules - -<> are defined through a source-specific DSL JSON snippet. - -Use advanced sync rules to filter data at the Redis source, without needing to index all data into Elasticsearch. - -They take the following parameters: - -1. `database`: Specify the Redis database index as an integer value. -2. `key_pattern`: 2. `key_pattern`: Pattern for finding keys in Redis. -3. `type`: Key type for the Redis. -+ -Supported values: - -* `HASH` -* `LIST` -* `SET` -* `STREAM` -* `STRING` -* `ZSET` - -[NOTE] -==== -Provide at least one of the following: `key_pattern` or `type`, or both. -==== - -[discrete#es-connectors-redis-connector-advanced-sync-rules-examples] -===== Advanced sync rules examples - -[discrete#es-connectors-redis-connector-advanced-sync-rules-example-1] -====== Example 1 - -*Fetch database records where keys start with `alpha`*: - -[source,js] ----- -[ - { - "database": 0, - "key_pattern": "alpha*" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-redis-connector-advanced-sync-rules-example-2] -====== Example 2 - -*Fetch database records with exact match by specifying the full key name:* - -[source,js] ----- -[ - { - "database": 0, - "key_pattern": "alpha" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-redis-connector-advanced-sync-rules-example-3] -====== Example 3 - -*Fetch database records where keys start with `test1`, `test2` or `test3`:* - -[source,js] ----- -[ - { - "database": 0, - "key_pattern": "test[123]" - } - ----- -// NOTCONSOLE - -[discrete#es-connectors-redis-connector-advanced-sync-rules-example-4] -====== Example 4 - -*Exclude database records where keys start with `test1`, `test2` or `test3`:* - -[source,js] ----- -[ - { - "database": 0, - "key_pattern": "test[^123]" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-redis-connector-advanced-sync-rules-example-5] -====== Example 5 - -*Fetch all database records:* - -[source,js] ----- -[ - { - "database": 0, - "key_pattern": "*" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-redis-connector-advanced-sync-rules-example-6] -====== Example 6 - -*Fetch all database records where type is `SET`:* - -[source,js] ----- -[ - { - "database": 0, - "key_pattern": "*", - "type": "SET" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-redis-connector-advanced-sync-rules-example-7] -====== Example 7 - -*Fetch database records where type is `SET`*: - -[source,js] ----- -[ - { - "database": 0, - "type": "SET" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-redis-connector-connector-client-operations] -==== Connector Client operations - -[discrete#es-connectors-redis-connector-end-to-end-testing] -===== End-to-end Testing - -The connector framework enables operators to run functional tests against a real data source, using Docker Compose. -You don't need a running Elasticsearch instance or Redis source to run this test. - -Refer to <> for more details. - -To perform E2E testing for the Redis connector, run the following command: - -```shell -$ make ftest NAME=redis -``` -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=redis DATA_SIZE=small ----- - -By default, `DATA_SIZE=MEDIUM`. - -[discrete#es-connectors-redis-connector-known-issues] -==== Known issues - -* The last modified time is unavailable when retrieving keys/values from the Redis database. -As a result, *all objects* are indexed each time an advanced sync rule query is executed. - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-redis-connector-troubleshooting] -==== Troubleshooting - -See <>. - -[discrete#es-connectors-redis-connector-security] -==== Security - -See <>. \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-refs.asciidoc b/docs/reference/connector/docs/connectors-refs.asciidoc deleted file mode 100644 index 51580410d5bc5..0000000000000 --- a/docs/reference/connector/docs/connectors-refs.asciidoc +++ /dev/null @@ -1,64 +0,0 @@ -[#es-connectors-refs] -== Connectors references - -The following pages contain the reference documentation for each individual connector: - -include::_connectors-list-clients.asciidoc[] - -include::connectors-azure-blob.asciidoc[] - -include::connectors-box.asciidoc[] - -include::connectors-confluence.asciidoc[] - -include::connectors-dropbox.asciidoc[] - -include::connectors-github.asciidoc[] - -include::connectors-gmail.asciidoc[] - -include::connectors-google-cloud.asciidoc[] - -include::connectors-google-drive.asciidoc[] - -include::connectors-graphql.asciidoc[] - -include::connectors-jira.asciidoc[] - -include::connectors-ms-sql.asciidoc[] - -include::connectors-mongodb.asciidoc[] - -include::connectors-mysql.asciidoc[] - -include::connectors-network-drive.asciidoc[] - -include::connectors-notion.asciidoc[] - -include::connectors-onedrive.asciidoc[] - -include::connectors-opentext-documentum.asciidoc[] - -include::connectors-oracle.asciidoc[] - -include::connectors-outlook.asciidoc[] - -include::connectors-postgresql.asciidoc[] - -include::connectors-redis.asciidoc[] - -include::connectors-s3.asciidoc[] - -include::connectors-salesforce.asciidoc[] - -include::connectors-servicenow.asciidoc[] - -include::connectors-sharepoint-online.asciidoc[] - -include::connectors-sharepoint.asciidoc[] - -include::connectors-slack.asciidoc[] - -include::connectors-teams.asciidoc[] - -include::connectors-zoom.asciidoc[] \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-release-notes.asciidoc b/docs/reference/connector/docs/connectors-release-notes.asciidoc deleted file mode 100644 index ff3d859e1a888..0000000000000 --- a/docs/reference/connector/docs/connectors-release-notes.asciidoc +++ /dev/null @@ -1,84 +0,0 @@ -[#es-connectors-release-notes] -== Connector release notes -++++ -Release notes -++++ - -[NOTE] -==== -Prior to version *8.16.0*, the connector release notes were published as part of the {enterprise-search-ref}/changelog.html[Enterprise Search documentation]. -==== - -[discrete] -[[es-connectors-release-notes-8-17-0]] -=== 8.17.0 - -No notable changes in this release. - -[discrete] -[[es-connectors-release-notes-8-16-1]] -=== 8.16.1 - -[discrete] -[[es-connectors-release-notes-8-16-1-bug-fixes]] -==== Bug fixes - -* Fixed a bug in the Outlook Connector where having deactivated users could cause the sync to fail. -See https://github.com/elastic/connectors/pull/2967[*PR 2967*]. -* Fixed a bug where the Confluence connector was not downloading some blog post documents due to unexpected response format. -See https://github.com/elastic/connectors/pull/2984[*PR 2984*]. - -[discrete] -[[es-connectors-release-notes-8-16-0]] -=== 8.16.0 - -[discrete] -[[es-connectors-release-notes-deprecation-notice]] -==== Deprecation notices - -* *Direct index access for connectors and sync jobs* -+ -IMPORTANT: Directly accessing connector and sync job state through `.elastic-connectors*` indices is deprecated, and will be disallowed entirely in a future release. - -* Instead, the Elasticsearch Connector APIs should be used. Connectors framework code now uses the <> by default. -See https://github.com/elastic/connectors/pull/2884[*PR 2902*]. - -* *Docker `enterprise-search` namespace deprecation* -+ -IMPORTANT: The `enterprise-search` Docker namespace is deprecated and will be discontinued in a future release. -+ -Starting in `8.16.0`, Docker images are being transitioned to the new `integrations` namespace, which will become the sole location for future releases. This affects the https://github.com/elastic/connectors[Elastic Connectors] and https://github.com/elastic/data-extraction-service[Elastic Data Extraction Service]. -+ -During this transition period, images are published to both namespaces: -+ -** *Example*: -+ -Deprecated namespace:: -`docker.elastic.co/enterprise-search/elastic-connectors:v8.16.0` -+ -New namespace:: -`docker.elastic.co/integrations/elastic-connectors:v8.16.0` -+ -Users should migrate to the new `integrations` namespace as soon as possible to ensure continued access to future releases. - -[discrete] -[[es-connectors-release-notes-8-16-0-enhancements]] -==== Enhancements - -* Docker images now use Chainguard's Wolfi base image (`docker.elastic.co/wolfi/jdk:openjdk-11-dev`), replacing the previous `ubuntu:focal` base. - -* The Sharepoint Online connector now works with the `Sites.Selected` permission instead of the broader permission `Sites.Read.All`. -See https://github.com/elastic/connectors/pull/2762[*PR 2762*]. - -* Starting in 8.16.0, connectors will start using proper SEMVER, with `MAJOR.MINOR.PATCH`, which aligns with Elasticsearch/Kibana versions. This drops the previous `.BUILD` suffix, which we used to release connectors between Elastic stack releases. Going forward, these inter-stack-release releases will be suffixed instead with `+`, aligning with Elastic Agent and conforming to SEMVER. -See https://github.com/elastic/connectors/pull/2749[*PR 2749*]. - -* Connector logs now use UTC timestamps, instead of machine-local timestamps. This only impacts logging output. -See https://github.com/elastic/connectors/pull/2695[*PR 2695*]. - -[discrete] -[[es-connectors-release-notes-8-16-0-bug-fixes]] -==== Bug fixes - -* The Dropbox connector now fetches the files from team shared folders. -See https://github.com/elastic/connectors/pull/2718[*PR 2718*]. diff --git a/docs/reference/connector/docs/connectors-run-from-docker.asciidoc b/docs/reference/connector/docs/connectors-run-from-docker.asciidoc deleted file mode 100644 index 88616e374963e..0000000000000 --- a/docs/reference/connector/docs/connectors-run-from-docker.asciidoc +++ /dev/null @@ -1,95 +0,0 @@ -[#es-connectors-run-from-docker] -=== Running from a Docker container - -[TIP] -==== -Use our <> to quickly get started with a full Elastic Stack deployment using Connectors. -==== - -Instead of running the Connectors Service from source, you can use the official Docker image to run the service in a container. - -As a prerequisite, you need to have an Elasticsearch and Kibana instance running. -From inside your Kibana UI, You will need to <> in the same manner as if you are running the service from source. - -When you are ready to run Connectors: - -*Step 1: Download sample configuration file* - -Download the sample configuration file. -You can either download it manually or run the following command: - -[source,sh] ----- -curl https://raw.githubusercontent.com/elastic/connectors/main/config.yml.example --output /connectors-config/config.yml ----- -// NOTCONSOLE - -Don't forget to change the `--output` argument value to the path where you want to save the `config.yml` file on your local system. -But keep note of where you wrote this file, as it is required in the `docker run` step below. - -*Step 2: Update the configuration file for your self-managed connector* - -Update the configuration file with the following settings to match your environment: - -* `elasticsearch.host` -* `elasticsearch.api_key` -* `connectors` - -If you're running the connector service against a Dockerized version of Elasticsearch and Kibana, your config file will look like this: - -[source,yaml,subs="attributes"] ----- -# When connecting to your cloud deployment you should edit the host value -elasticsearch.host: http://host.docker.internal:9200 -elasticsearch.api_key: - -connectors: - - - connector_id: - service_type: {service-name} # sharepoint_online (example) - api_key: # Optional. If not provided, the connector will use the elasticsearch.api_key instead - ----- - -*Step 3: Run the Docker image* - -Run the Docker image with the Connector Service using the following command: - -[source,sh,subs="attributes"] ----- -docker run \ --v "/connectors-config:/config" \ # NOTE: you must change this path to match where the config.yml is located ---rm \ ---tty -i \ ---network host \ -docker.elastic.co/integrations/elastic-connectors:{version}.0 \ -/app/bin/elastic-ingest \ --c /config/config.yml ----- - -[TIP] -==== -For unreleased versions, append the `-SNAPSHOT` suffix to the version number. -For example, `docker.elastic.co/integrations/elastic-connectors:8.14.0.0-SNAPSHOT`. -==== - -Find all available Docker images in the https://www.docker.elastic.co/r/integrations/elastic-connectors[official registry]. - -[discrete#es-build-connector-finalizes-kibana] -==== Enter data source details in Kibana - -Once the connector service is running, it's time to head back to the Kibana UI to finalize the connector configuration. -In this step, you need to add the specific connection details about your data source instance, like URL, authorization credentials, etc. -As mentioned above, these details will vary based on the third-party data source you’re connecting to. - -For example, the PostgreSQL connector requires the following details: - -* *Host* -* *Port* -* *Username* -* *Password* -* *Database* -* *Comma-separated list of tables* - -You're now ready to run a sync. -Select the *Full sync* button in the Kibana UI to start ingesting documents into Elasticsearch. diff --git a/docs/reference/connector/docs/connectors-run-from-source.asciidoc b/docs/reference/connector/docs/connectors-run-from-source.asciidoc deleted file mode 100644 index a5d1dc31074f2..0000000000000 --- a/docs/reference/connector/docs/connectors-run-from-source.asciidoc +++ /dev/null @@ -1,103 +0,0 @@ -[#es-connectors-run-from-source] -=== Running from the source code - -The basic way to run connectors is to clone the repository and run the code locally. -This is a good option if you are comfortable with Python and want to iterate quickly. - -[discrete#es-connectors-run-from-source-setup-kibana] -==== Initial setup in Kibana - -Follow the *Connector* workflow in the Kibana UI to select the *Connector* ingestion method. - -Next, complete these steps: - -1. Choose which third-party service you’d like to use by selecting a *data source*. -2. Create and name a new *Elasticsearch index*. -3. Generate a new *API key* and save it somewhere safe. -4. Name your connector and provide an optional description -5. *Convert* managed connector to a self-managed connector (_Only applicable if connector is also available natively_). This action is irreversible. -6. Copy the configuration block from the example shown on the screen. You’ll use this in a later step: -+ -[source,yaml] ----- -# ... -connectors: - - connector_id: - api_key: # Scoped API key for this connector (optional). If not specified, the top-level `elasticsearch.api_key` value is used. - service_type: gmail # example ----- - -[discrete#es-connectors-run-from-source-source-clone] -====== Clone the repository and edit `config.yml` - -Once you've created an index, and entered the access details for your data source, you're ready to deploy the connector service. - -First, you need to clone the `elastic/connectors` repository. - -Follow these steps: - -* Clone or fork the `connectors` repository locally with the following command: `git clone https://github.com/elastic/connectors`. -* Run `make config` to generate your initial `config.yml` file -* Open the `config.yml` configuration file in your editor of choice. -* Replace the values for `host` (your Elasticsearch endpoint), `api_key`, `connector_id`, and `service_type`. -+ -.*Expand* to see an example `config.yml` file -[%collapsible] -==== -Replace the values for `api_key`, `connector_id`, and `service_type` with the values you copied earlier. -[source,yaml] ----- -elasticsearch: - api_key: # Used to write data to .elastic-connectors and .elastic-connectors-sync-jobs - # Any connectors without a specific `api_key` value will default to using this key -connectors: - - connector_id: 1234 - api_key: # Used to write data to the `search-*` index associated with connector 1234 - # You may have multiple connectors in your config file! - - connector_id: 5678 - api_key: # Used to write data to the `search-*` index associated with connector 5678 - - connector_id: abcd # No explicit api key specified, so this connector will use ----- - -[discrete#es-connectors-run-from-source-api-keys] -[NOTE] -===== -**API keys for connectors** - -You can configure multiple connectors in your `config.yml` file. - -The Kibana UI enables you to create API keys that are scoped to a specific index/connector. -If you don't create an API key for a specific connector, the top-level `elasticsearch.api_key` or `elasticsearch.username:elasticsearch.password` value is used. - -If these top-level Elasticsearch credentials are not sufficiently privileged to write to individual connector indices, you'll need to create these additional, scoped API keys. - -Use the example above as a guide. -===== -==== - -[discrete#es-connectors-run-from-source-run] -====== Run the connector service - -[NOTE] -==== -You need Python version `3.10` or `3.11` to run the connectors service from source. -==== - -Once you've configured the connector code, you can run the connector service. - -In your terminal or IDE: - -. `cd` into the root of your `connectors` clone/fork. -. Run the following commands to compile and run the connector service: -+ -[source,shell] ----- -make install -make run ----- - -The connector service should now be running. -The UI will let you know that the connector has successfully connected to your Elasticsearch instance. - -As a reminder, here we're working locally. -In a production setup, you'll deploy the connector service to your own infrastructure. diff --git a/docs/reference/connector/docs/connectors-s3.asciidoc b/docs/reference/connector/docs/connectors-s3.asciidoc deleted file mode 100644 index 90c070f7b8044..0000000000000 --- a/docs/reference/connector/docs/connectors-s3.asciidoc +++ /dev/null @@ -1,437 +0,0 @@ -[#es-connectors-s3] -=== Elastic S3 connector reference -++++ -S3 -++++ -// Attributes used in this file: -:service-name: Amazon S3 -:service-name-stub: s3 - -The _Elastic S3 connector_ is a <> for https://aws.amazon.com/s3/[Amazon S3^] data sources. - - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-s3-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-s3-prerequisites] -===== Availability and prerequisites - -This connector is available natively in Elastic Cloud as of version *8.12.0*. -To use this connector, satisfy all <>. - -[discrete#es-connectors-s3-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-s3-usage] -===== Usage - -To use this managed connector, see <>. - -For additional operations, see <>. - -S3 users will also need to <> - -[discrete#es-connectors-s3-usage-create-iam] -====== Create an IAM identity - -Users need to create an IAM identity to use this connector as a *self-managed connector*. -Refer to https://docs.aws.amazon.com/IAM/latest/UserGuide/getting-set-up.html[the AWS documentation^]. - -The https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html[policy^] associated with the IAM identity must have the following *AWS permissions*: - -* `ListAllMyBuckets` -* `ListBucket` -* `GetBucketLocation` -* `GetObject` - -[discrete#es-connectors-s3-compatibility] -===== Compatibility - -Currently the connector does not support S3-compatible vendors. - -[discrete#es-connectors-s3-configuration] -===== Configuration - -The following configuration fields are required to *set up* the connector: - -AWS Buckets:: -List of S3 bucket names. -`*` will fetch data from all buckets. -Examples: -+ -* `testbucket, prodbucket` -* `testbucket` -* `*` - -[NOTE] -==== -This field is ignored when using advanced sync rules. -==== - -AWS Access Key ID:: -Access Key ID for the AWS identity that will be used for bucket access. - -AWS Secret Key:: -Secret Access Key for the AWS identity that will be used for bucket access. - -[discrete#es-connectors-s3-documents-syncs] -===== Documents and syncs - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced. -**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-s3-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-s3-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -Use advanced sync rules to filter data to be fetched from Amazon S3 buckets. -They take the following parameters: - -1. `bucket`: S3 bucket the rule applies to. -2. `extension` (optional): Lists which file types to sync. Defaults to syncing all types. -3. `prefix` (optional): String of prefix characters. -The connector will fetch file and folder data that matches the string. -Defaults to `""` (syncs all bucket objects). - -[discrete#es-connectors-s3-sync-rules-advanced-examples] -*Advanced sync rules examples* - -*Fetching files and folders recursively by prefix* - -*Example*: Fetch files/folders in `folder1/docs`. - -[source,js] ----- -[ - { - "bucket": "bucket1", - "prefix": "folder1/docs" - } - -] ----- -// NOTCONSOLE - -*Example*: Fetch files/folder starting with `folder1`. - -[source,js] ----- -[ - { - "bucket": "bucket2", - "prefix": "folder1" - } -] ----- -// NOTCONSOLE - -*Fetching files and folders by specifying extensions* - -*Example*: Fetch all objects which start with `abc` and then filter using file extensions. - -[source,js] ----- -[ - { - "bucket": "bucket2", - "prefix": "abc", - "extension": [".txt", ".png"] - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-s3-content-extraction] -===== Content extraction - -See <>. - -[discrete#es-connectors-s3-known-issues] -===== Known issues - -There are no known issues for this connector. - -See <> for any issues affecting all connectors. - -[discrete#es-connectors-s3-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-s3-security] -===== Security - -See <>. - -[discrete#es-connectors-s3-source] -===== Framework and source - -This connector is built with the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/s3.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-s3-connector-client-reference] -==== *Self-managed connector reference* - -.View *self-managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-s3-client-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. -This self-managed connector is compatible with Elastic versions *8.6.0+*. -To use this connector, satisfy all <>. - -[discrete#es-connectors-s3-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-s3-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, see <>. - -For additional operations, see <>. - -S3 users will also need to <> - -[discrete#es-connectors-s3-client-usage-create-iam] -====== Create an IAM identity - -Users need to create an IAM identity to use this connector as a *self-managed connector*. -Refer to https://docs.aws.amazon.com/IAM/latest/UserGuide/getting-set-up.html[the AWS documentation^]. - -The https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html[policy^] associated with the IAM identity must have the following *AWS permissions*: - -* `ListAllMyBuckets` -* `ListBucket` -* `GetBucketLocation` -* `GetObject` - -[discrete#es-connectors-s3-client-compatibility] -===== Compatibility - -Currently the connector does not support S3-compatible vendors. - -[discrete#es-connectors-s3-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, these fields will use the default configuration set in the https://github.com/elastic/connectors/blob/a5976d20cd8277ae46511f7176662afc889e56ec/connectors/sources/s3.py#L231-L258[connector source code^]. -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required to *set up* the connector: - -`buckets`:: -List of S3 bucket names. -`*` will fetch data from all buckets. -Examples: -+ -* `testbucket, prodbucket` -* `testbucket` -* `*` - -[NOTE] -==== -This field is ignored when using advanced sync rules. -==== - -`aws_access_key_id`:: -Access Key ID for the AWS identity that will be used for bucket access. - -`aws_secret_access_key`:: -Secret Access Key for the AWS identity that will be used for bucket access. - -`read_timeout`:: -The `read_timeout` for Amazon S3. -Default value is `90`. - -`connect_timeout`:: -Connection timeout for crawling S3. -Default value is `90`. - -`max_attempts`:: -Maximum retry attempts. -Default value is `5`. - -`page_size`:: -Page size for iterating bucket objects in Amazon S3. -Default value is `100`. - -[discrete#es-connectors-s3-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-s3-client-documents-syncs] -===== Documents and syncs - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. -* Permissions are not synced. -**All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-s3-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-s3-client-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -Use advanced sync rules to filter data to be fetched from Amazon S3 buckets. -They take the following parameters: - -1. `bucket`: S3 bucket the rule applies to. -2. `extension` (optional): Lists which file types to sync. Defaults to syncing all types. -3. `prefix` (optional): String of prefix characters. -The connector will fetch file and folder data that matches the string. -Defaults to `""` (syncs all bucket objects). - -[discrete#es-connectors-s3-client-sync-rules-advanced-examples] -*Advanced sync rules examples* - -*Fetching files and folders recursively by prefix* - -*Example*: Fetch files/folders in `folder1/docs`. - -[source,js] ----- -[ - { - "bucket": "bucket1", - "prefix": "folder1/docs" - } - -] ----- -// NOTCONSOLE - -*Example*: Fetch files/folder starting with `folder1`. - -[source,js] ----- -[ - { - "bucket": "bucket2", - "prefix": "folder1" - } -] ----- -// NOTCONSOLE - -*Fetching files and folders by specifying extensions* - -*Example*: Fetch all objects which start with `abc` and then filter using file extensions. - -[source,js] ----- -[ - { - "bucket": "bucket2", - "prefix": "abc", - "extension": [".txt", ".png"] - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-s3-client-content-extraction] -===== Content extraction - -See <>. - -[discrete#es-connectors-s3-client-testing] -===== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To execute a functional test for the Amazon S3 *self-managed connector*, run the following command: - -[source,shell] ----- -make ftest NAME=s3 ----- - -By default, this will use a medium-sized dataset. -To make the test faster add the `DATA_SIZE=small` argument: - -[source,shell] ----- -make ftest NAME=s3 DATA_SIZE=small ----- - -[discrete#es-connectors-s3-client-known-issues] -===== Known issues - -There are no known issues for this connector. - -See <> for any issues affecting all connectors. - -[discrete#es-connectors-s3-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-s3-client-security] -===== Security - -See <>. - -[discrete#es-connectors-s3-client-source] -===== Framework and source - -This connector is built with the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/s3.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-salesforce.asciidoc b/docs/reference/connector/docs/connectors-salesforce.asciidoc deleted file mode 100644 index f5c5512ad5cc4..0000000000000 --- a/docs/reference/connector/docs/connectors-salesforce.asciidoc +++ /dev/null @@ -1,813 +0,0 @@ -[#es-connectors-salesforce] -=== Elastic Salesforce connector reference -++++ -Salesforce -++++ -// Attributes used in this file -:service-name: Salesforce -:service-name-stub: salesforce - -The _Elastic Salesforce connector_ is a <> for https://www.salesforce.com/[Salesforce^] data sources. - - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-salesforce-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-salesforce-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* in Elastic Cloud since *8.12.0*. -To use this connector, satisfy all <>. - -[discrete#es-connectors-salesforce-compatability] -===== Compatibility - -This connector is compatible with the following: - -* Salesforce -* Salesforce Sandbox - -[discrete#es-connectors-salesforce-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-salesforce-usage] -===== Usage - -To use this connector as a **self-managed connector**, use the *Connector* workflow in the Kibana UI. - -For additional operations, see <>. - -[NOTE] -==== -You need to create an Salesforce connected app with OAuth2.0 enabled to authenticate with Salesforce. -==== - -[discrete#es-connectors-salesforce-connected-app] -====== Create a Salesforce connected app - -The Salesforce connector authenticates with Salesforce through a *connected app*. -Follow the official Salesforce documentation for https://help.salesforce.com/s/articleView?id=sf.connected_app_client_credentials_setup.htm[Configuring a Connected App for the OAuth 2.0 Client Credentials Flow^]. - -When creating the connected app, in the section titled *API (Enable OAuth Settings)* ensure the following settings are _enabled_: - -* *Enable OAuth Settings* -* *Enable for Device Flow* -** *Callback URL* should be the Salesforce dummy callback URL, `https://test.salesforce.com/services/oauth2/success` -* *Require Secret for Web Server Flow* -* *Require Secret for Refresh Token Flow* -* *Enable Client Credentials Flow* - -All other options should be disabled. -Finally, in the section *Selected OAuth Scopes*, include the following OAuth scopes: - -* *Manage user data via APIs (api)* -* *Perform requests at any time (refresh_token, offline_access)* - -[discrete#es-connectors-salesforce-admin-prerequisites] -===== Salesforce admin requirements - -By default, the Salesforce connector requires global administrator permissions to access Salesforce data. -Expand the section below to learn how to create a custom Salesforce user with minimal permissions. - -.*Create a custom Salesforce user with minimal permissions* -[%collapsible] -========================== -By creating a custom profile with sufficient permissions from the Setup menu, you can remove the system administrator role requirement for fetching data from Salesforce. - -To create a new profile: - -1. From the Salesforce Setup menu, go to *Administration => Users => Profiles*. -2. Create a new profile. -3. Choose `Read Only` or `Standard User` from the *Existing Profile* dropdown. Name the profile and save it. -+ -[TIP] -==== -By default, `Read Only` or `Standard User` users have read permission to access all standard objects. -==== -+ -4. Edit the newly created profile. Under *Object Permissions*, assign at least `Read` access to the standard objects and custom objects you want to ingest into Elasticsearch. -5. Make sure the newly created profile has at least `Read` access for the following standard objects: - -* Account -* Campaign -* Case -* Contact -* EmailMessage -* Lead -* Opportunity -* User -+ -[TIP] -==== -If using <> you'll need to assign `Read` access for that specific object in the profile. -==== -+ -6. Go to *Users => Profiles* and assign the newly created profile to the user. -7. Go to *Connected apps*, select your app and then select *Edit policies*. -Assign the client credentials flow to the user with the custom profile in Salesforce. -+ -Now, the connector can be configured for this user profile to fetch all object records, without needing the system administration role. -========================== - -[discrete#es-connectors-salesforce-configuration] -===== Configuration - -The following settings are required to set up this connector: - -Domain (required):: -The domain for your Salesforce account. -This is the subdomain that appears in your Salesforce URL. -For example, if your Salesforce URL is `foo.my.salesforce.com`, then your domain would be `foo`. -If you are using Salesforce Sandbox, your URL will contain an extra subdomain and will look similar to `foo.sandbox.my.salesforce.com`. -In this case, your domain would be `foo.sandbox`. - -Client ID (required):: -The Client ID generated by your connected app. -The Salesforce documentation will sometimes also call this a *Consumer Key* - -Client Secret (required):: -The Client Secret generated by your connected app. -The Salesforce documentation will sometimes also call this a *Consumer Secret*. - -Enable document level security:: -Toggle to enable document level security (DLS). -Optional, disabled by default. -Refer to the <> for more information, including how to set various Salesforce permission types. -+ -When enabled: - -* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -* Access control syncs will fetch users' access control lists and store them in a separate index. - -[discrete#es-connectors-salesforce-configuration-credentials] -====== Finding the Client ID and Client Secret - -The Client ID and Client Secret are not automatically shown to you after you create a connected app. -You can find them by taking the following steps: - -* Navigate to *Setup* -* Go to *Platform Tools > Apps > App Manager* -* Click on the triangle next to your app and select *View* -* After the page loads, click on *Manage Consumer Details* - -Your Client ID and Client Secret should now be visible at the top of the page. - -[discrete#es-connectors-salesforce-dls] -===== Document level security (DLS) - -<> enables you to restrict access to documents based on a user'­s permissions. -This feature is available by default for the Salesforce connector and supports both *standard and custom objects*. - -Salesforce allows users to set permissions in the following ways: - -* *Profiles* -* *Permission sets* -* *Permission set Groups* - -For guidance, refer to these https://howtovideos.hubs.vidyard.com/watch/B1bQnMFg2VyZq7V6zXQjPg#:~:text=This%20is%20a%20must%20watch,records%20in%20your%20Salesforce%20organization[video tutorials] about setting Salesforce permissions. - -To ingest any standard or custom objects, users must ensure that at least `Read` permission is granted to that object. -This can be granted using any of the following methods for setting permissions. - -[discrete#es-connectors-salesforce-dls-profiles] -====== Set Permissions using Profiles - -Refer to the https://help.salesforce.com/s/articleView?id=sf.admin_userprofiles.htm&type=5[Salesforce documentation] for setting permissions via Profiles. - -[discrete#es-connectors-salesforce-dls-permission-sets] -====== Set Permissions using Permissions Set - -Refer to the https://help.salesforce.com/s/articleView?id=sf.perm_sets_overview.htm&language=en_US&type=5[Salesforce documentation] for setting permissions via Permissions Sets. - -[discrete#es-connectors-salesforce-dls-permission-set-groups] -====== Set Permissions using Permissions Set group - -Refer to the https://help.salesforce.com/s/articleView?id=sf.perm_set_groups.htm&type=5[Salesforce documentation] for setting permissions via Permissions Set Groups. - -[discrete#es-connectors-salesforce-dls-assign-permissions] -====== Assign Profiles, Permission Set and Permission Set Groups to the User - -Once the permissions are set, assign the Profiles, Permission Set or Permission Set Groups to the user. -Follow these steps in Salesforce: - -1. Navigate to `Administration` under the `Users` section. -2. Select `Users` and choose the user to set the permissions to. -3. Set the `Profile`, `Permission Set` or `Permission Set Groups` created in the earlier steps. - -[discrete#es-connectors-salesforce-sync-rules] -===== Sync rules - -_Basic_ sync rules are identical for all connectors and are available by default. -For more information read <>. - -[discrete#es-connectors-salesforce-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -The following section describes *advanced sync rules* for this connector. -Advanced sync rules enable filtering of data in Salesforce _before_ indexing into Elasticsearch. - -They take the following parameters: - -. `query` : Salesforce query to filter the documents. -. `language` : Salesforce query language. -Allowed values are *SOQL* and *SOSL*. - -[discrete#es-connectors-salesforce-sync-rules-advanced-fetch-query-language] -*Fetch documents based on the query and language specified* - -**Example**: Fetch documents using SOQL query - -[source,js] ----- -[ - { - "query": "SELECT Id, Name FROM Account", - "language": "SOQL" - } -] ----- -// NOTCONSOLE - -**Example**: Fetch documents using SOSL query. - -[source,js] ----- -[ - { - "query": "FIND {Salesforce} IN ALL FIELDS", - "language": "SOSL" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-salesforce-sync-rules-advanced-fetch-objects] -*Fetch standard and custom objects using SOQL and SOSL queries* - -**Example**: Fetch documents for standard objects via SOQL and SOSL query. - -[source,js] ----- -[ - { - "query": "SELECT Account_Id, Address, Contact_Number FROM Account", - "language": "SOQL" - }, - { - "query": "FIND {Alex Wilber} IN ALL FIELDS RETURNING Contact(LastModifiedDate, Name, Address)", - "language": "SOSL" - } -] ----- -// NOTCONSOLE - -**Example**: Fetch documents for custom objects via SOQL and SOSL query. - -[source,js] ----- -[ - { - "query": "SELECT Connector_Name, Version FROM Connector__c", - "language": "SOQL" - }, - { - "query": "FIND {Salesforce} IN ALL FIELDS RETURNING Connectors__c(Id, Connector_Name, Connector_Version)", - "language": "SOSL" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-salesforce-sync-rules-advanced-fetch-standard-custom-fields] -*Fetch documents with standard and custom fields* - -**Example**: Fetch documents with all standard and custom fields for Account object. - -[source,js] ----- -[ - { - "query": "SELECT FIELDS(ALL) FROM Account", - "language": "SOQL" - } -] ----- -// NOTCONSOLE - -**Example**: Fetch documents with all custom fields for Connector object. - -[source,js] ----- -[ - { - "query": "SELECT FIELDS(CUSTOM) FROM Connector__c", - "language": "SOQL" - } -] ----- -// NOTCONSOLE - -**Example**: Fetch documents with all standard fields for Account object. - -[source,js] ----- -[ - { - "query": "SELECT FIELDS(STANDARD) FROM Account", - "language": "SOQL" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-salesforce-documents-syncs] -===== Documents and syncs - -The connector syncs the following Salesforce objects: - -* *Accounts* -* *Campaigns* -* *Cases* -* *Contacts* -* *Content Documents* (files uploaded to Salesforce) -* *Leads* -* *Opportunities* - -The connector will not ingest any objects that it does not have permissions to query. - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced by default. You must enable <>. Otherwise, **all documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-salesforce-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-salesforce-content-extraction] -===== Content Extraction - -The connector will retrieve Content Documents from your Salesforce source if they meet the following criteria: - -* Are attached to one or more objects that are synced -* Are of a file type that can be extracted - -This means that the connector will not ingest any Content Documents you have that are _not_ attached to a supported Salesforce object. -See <> for a list of supported object types. - -If a single Content Document is attached to multiple supported objects, only one Elastic document will be created for it. -This document will retain links to every object that it was connected to in the `related_ids` field. - -See <> for more specifics on content extraction. - -[discrete#es-connectors-salesforce-known-issues] -===== Known issues - -* *DLS feature is "type-level" not "document-level"* -+ -Salesforce DLS, added in 8.13.0, does not accomodate specific access controls to specific Salesforce Objects. -Instead, if a given user/group can have access to _any_ Objects of a given type (`Case`, `Lead`, `Opportunity`, etc), that user/group will appear in the `\_allow_access_control` list for _all_ of the Objects of that type. -See https://github.com/elastic/connectors/issues/3028 for more details. -+ - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-salesforce-security] -===== Security - -See <>. - -[discrete#es-connectors-salesforce-source] -===== Framework and source - -This connector is built with the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/salesforce.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-salesforce-connector-client-reference] -==== *Self-managed connector reference* - -.View *self-managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-salesforce-client-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. -This self-managed connector is compatible with Elastic versions *8.10.0+*. -To use this connector, satisfy all <>. - -[discrete#es-connectors-salesforce-client-compatability] -===== Compatibility - -This connector is compatible with the following: - -* Salesforce -* Salesforce Sandbox - -[discrete#es-connectors-salesforce-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-salesforce-client-usage] -===== Usage - -To use this connector as a **self-managed connector**, use the *Connector* workflow in the Kibana UI. - -For additional operations, see <>. - -[NOTE] -==== -You need to create an Salesforce connected app with OAuth2.0 enabled to authenticate with Salesforce. -==== - -[discrete#es-connectors-salesforce-client-connected-app] -====== Create a Salesforce connected app - -The Salesforce connector authenticates with Salesforce through a *connected app*. -Follow the official Salesforce documentation for https://help.salesforce.com/s/articleView?id=sf.connected_app_client_credentials_setup.htm[Configuring a Connected App for the OAuth 2.0 Client Credentials Flow^]. - -When creating the connected app, in the section titled *API (Enable OAuth Settings)* ensure the following settings are _enabled_: - -* *Enable OAuth Settings* -* *Enable for Device Flow* -** *Callback URL* should be the Salesforce dummy callback URL, `https://test.salesforce.com/services/oauth2/success` -* *Require Secret for Web Server Flow* -* *Require Secret for Refresh Token Flow* -* *Enable Client Credentials Flow* - -All other options should be disabled. -Finally, in the section *Selected OAuth Scopes*, include the following OAuth scopes: - -* *Manage user data via APIs (api)* -* *Perform requests at any time (refresh_token, offline_access)* - -[discrete#es-connectors-client-salesforce-admin-prerequisites] -===== Salesforce admin requirements - -By default, the Salesforce connector requires global administrator permissions to access Salesforce data. -Expand the section below to learn how to create a custom Salesforce user with minimal permissions. - -.*Create a custom Salesforce user with minimal permissions* -[%collapsible] -========================== -By creating a custom profile with sufficient permissions from the Setup menu, you can remove the system administrator role requirement for fetching data from Salesforce. - -To create a new profile: - -1. From the Salesforce Setup menu, go to *Administration => Users => Profiles*. -2. Create a new profile. -3. Choose `Read Only` or `Standard User` from the *Existing Profile* dropdown. Name the profile and save it. -+ -[TIP] -==== -By default, `Read Only` or `Standard User` users have read permission to access all standard objects. -==== -+ -4. Edit the newly created profile. Under *Object Permissions*, assign at least `Read` access to the standard objects and custom objects you want to ingest into Elasticsearch. -5. Make sure the newly created profile has at least `Read` access for the following standard objects: - -* Account -* Campaign -* Case -* Contact -* EmailMessage -* Lead -* Opportunity -* User -+ -[TIP] -==== -If using <> you'll need to assign `Read` access for that specific object in the profile. -==== -+ -6. Go to *Users => Profiles* and assign the newly created profile to the user. -7. Go to *Connected apps*, select your app and then select *Edit policies*. -Assign the client credentials flow to the user with the custom profile in Salesforce. -+ -Now, the connector can be configured for this user profile to fetch all object records, without needing the system administration role. -========================== - -[discrete#es-connectors-salesforce-client-docker] -===== Deployment using Docker - -Self-managed connectors are run on your own infrastructure. - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-salesforce-client-configuration] -===== Configuration - -The following settings are required to set up this connector: - -`domain`(required):: -The domain for your Salesforce account. -This is the subdomain that appears in your Salesforce URL. -For example, if your Salesforce URL is `foo.my.salesforce.com`, then your domain would be `foo`. -If you are using Salesforce Sandbox, your URL will contain an extra subdomain and will look similar to `foo.sandbox.my.salesforce.com`. -In this case, your domain would be `foo.sandbox`. - -`client_id`(required):: -The Client ID generated by your connected app. -The Salesforce documentation will sometimes also call this a *Consumer Key* - -`client_secret`(required):: -The Client Secret generated by your connected app. -The Salesforce documentation will sometimes also call this a *Consumer Secret*. - -`use_document_level_security`:: -Toggle to enable document level security (DLS). -Optional, disabled by default. -Refer to the <> for more information, including how to set various Salesforce permission types. -+ -When enabled: - -* Full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -* Access control syncs will fetch users' access control lists and store them in a separate index. - -[discrete#es-connectors-salesforce-client-configuration-credentials] -====== Finding the Client ID and Client Secret - -The Client ID and Client Secret are not automatically shown to you after you create a connected app. -You can find them by taking the following steps: - -* Navigate to *Setup* -* Go to *Platform Tools > Apps > App Manager* -* Click on the triangle next to your app and select *View* -* After the page loads, click on *Manage Consumer Details* - -Your Client ID and Client Secret should now be visible at the top of the page. - -[discrete#es-connectors-salesforce-client-dls] -===== Document level security (DLS) - -<> enables you to restrict access to documents based on a user'­s permissions. -This feature is available by default for the Salesforce connector and supports both *standard and custom objects*. - -Salesforce allows users to set permissions in the following ways: - -* *Profiles* -* *Permission sets* -* *Permission set Groups* - -For guidance, refer to these https://howtovideos.hubs.vidyard.com/watch/B1bQnMFg2VyZq7V6zXQjPg#:~:text=This%20is%20a%20must%20watch,records%20in%20your%20Salesforce%20organization[video tutorials] about setting Salesforce permissions. - -To ingest any standard or custom objects, users must ensure that at least `Read` permission is granted to that object. -This can be granted using any of the following methods for setting permissions. - -[discrete#es-connectors-salesforce-client-dls-profiles] -====== Set Permissions using Profiles - -Refer to the https://help.salesforce.com/s/articleView?id=sf.admin_userprofiles.htm&type=5[Salesforce documentation] for setting permissions via Profiles. - -[discrete#es-connectors-salesforce-client-dls-permission-sets] -====== Set Permissions using Permissions Set - -Refer to the https://help.salesforce.com/s/articleView?id=sf.perm_sets_overview.htm&language=en_US&type=5[Salesforce documentation] for setting permissions via Permissions Sets. - -[discrete#es-connectors-salesforce-client-dls-permission-set-groups] -====== Set Permissions using Permissions Set group - -Refer to the https://help.salesforce.com/s/articleView?id=sf.perm_set_groups.htm&type=5[Salesforce documentation] for setting permissions via Permissions Set Groups. - -[discrete#es-connectors-salesforce-client-dls-assign-permissions] -====== Assign Profiles, Permission Set and Permission Set Groups to the User - -Once the permissions are set, assign the Profiles, Permission Set or Permission Set Groups to the user. -Follow these steps in Salesforce: - -1. Navigate to `Administration` under the `Users` section. -2. Select `Users` and choose the user to set the permissions to. -3. Set the `Profile`, `Permission Set` or `Permission Set Groups` created in the earlier steps. - -[discrete#es-connectors-salesforce-client-sync-rules] -===== Sync rules - -_Basic_ sync rules are identical for all connectors and are available by default. - -For more information read <>. - -[discrete#es-connectors-salesforce-client-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -The following section describes *advanced sync rules* for this connector. -Advanced sync rules enable filtering of data in Salesforce _before_ indexing into Elasticsearch. - -They take the following parameters: - -. `query` : Salesforce query to filter the documents. -. `language` : Salesforce query language. -Allowed values are *SOQL* and *SOSL*. - -[discrete#es-connectors-salesforce-client-sync-rules-advanced-fetch-query-language] -*Fetch documents based on the query and language specified* - -**Example**: Fetch documents using SOQL query - -[source,js] ----- -[ - { - "query": "SELECT Id, Name FROM Account", - "language": "SOQL" - } -] ----- -// NOTCONSOLE - -**Example**: Fetch documents using SOSL query. - -[source,js] ----- -[ - { - "query": "FIND {Salesforce} IN ALL FIELDS", - "language": "SOSL" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-salesforce-client-sync-rules-advanced-fetch-objects] -*Fetch standard and custom objects using SOQL and SOSL queries* - -**Example**: Fetch documents for standard objects via SOQL and SOSL query. - -[source,js] ----- -[ - { - "query": "SELECT Account_Id, Address, Contact_Number FROM Account", - "language": "SOQL" - }, - { - "query": "FIND {Alex Wilber} IN ALL FIELDS RETURNING Contact(LastModifiedDate, Name, Address)", - "language": "SOSL" - } -] ----- -// NOTCONSOLE - -**Example**: Fetch documents for custom objects via SOQL and SOSL query. - -[source,js] ----- -[ - { - "query": "SELECT Connector_Name, Version FROM Connector__c", - "language": "SOQL" - }, - { - "query": "FIND {Salesforce} IN ALL FIELDS RETURNING Connectors__c(Id, Connector_Name, Connector_Version)", - "language": "SOSL" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-salesforce-client-sync-rules-advanced-fetch-standard-custom-fields] -*Fetch documents with standard and custom fields* - -**Example**: Fetch documents with all standard and custom fields for Account object. - -[source,js] ----- -[ - { - "query": "SELECT FIELDS(ALL) FROM Account", - "language": "SOQL" - } -] ----- -// NOTCONSOLE - -**Example**: Fetch documents with all custom fields for Connector object. - -[source,js] ----- -[ - { - "query": "SELECT FIELDS(CUSTOM) FROM Connector__c", - "language": "SOQL" - } -] ----- -// NOTCONSOLE - -**Example**: Fetch documents with all standard fields for Account object. - -[source,js] ----- -[ - { - "query": "SELECT FIELDS(STANDARD) FROM Account", - "language": "SOQL" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-salesforce-client-documents-syncs] -===== Documents and syncs - -The connector syncs the following Salesforce objects: - -* *Accounts* -* *Campaigns* -* *Cases* -* *Contacts* -* *Content Documents* (files uploaded to Salesforce) -* *Leads* -* *Opportunities* - - -The connector will not ingest any objects that it does not have permissions to query. - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted by default. Use the <> to handle larger binary files. -* Permissions are not synced by default. You must enable <>. Otherwise, **all documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-salesforce-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-salesforce-client-content-extraction] -===== Content Extraction - -The connector will retrieve Content Documents from your Salesforce source if they meet the following criteria: - -* Are attached to one or more objects that are synced -* Are of a file type that can be extracted - -This means that the connector will not ingest any Content Documents you have that are _not_ attached to a supported Salesforce object. -See <> for a list of supported object types. - -If a single Content Document is attached to multiple supported objects, only one Elastic document will be created for it. -This document will retain links to every object that it was connected to in the `related_ids` field. - -See <> for more specifics on content extraction. - -[discrete#es-connectors-salesforce-client-known-issues] -===== Known issues - -* *DLS feature is "type-level" not "document-level"* -+ -Salesforce DLS, added in 8.13.0, does not accomodate specific access controls to specific Salesforce Objects. -Instead, if a given user/group can have access to _any_ Objects of a given type (`Case`, `Lead`, `Opportunity`, etc), that user/group will appear in the `\_allow_access_control` list for _all_ of the Objects of that type. -See https://github.com/elastic/connectors/issues/3028 for more details. -+ - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-salesforce-client-security] -===== Security - -See <>. - -[discrete#es-connectors-salesforce-client-source] -===== Framework and source - -This connector is built with the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/salesforce.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-scalability.asciidoc b/docs/reference/connector/docs/connectors-scalability.asciidoc deleted file mode 100644 index 6ebcb58e11487..0000000000000 --- a/docs/reference/connector/docs/connectors-scalability.asciidoc +++ /dev/null @@ -1,49 +0,0 @@ -[#es-connectors-scalability] -=== Connector Scalability -++++ -Scalability -++++ - -[discrete#es-connectors-scalability-redundancy] -==== Redundancy - -Users can create a backup (secondary) server with an identical connector service setup (settings, code, etc..). -If the primary server running the connector service fails, users can start up the connector service on the secondary -server and restart the sync jobs. -Because connector definitions and job status information are all stored in Elasticsearch, there is no risk of data loss -or corruption when switching servers. - -However, note that any in-progress syncs will need to be restarted from scratch, and cannot be resumed where they were -interrupted from. - -[discrete#es-connectors-scalability-failover] -==== Failover - -There is currently no automatic failover or transfer of workload in case of failure. -If the server running the connector service fails, all outstanding connector sync jobs will go into a suspended state. -When the connector service returns (or if a replacement connector service is launched), it will identify any jobs that -need to be cleaned up and automatically restart (from scratch) the suspended jobs. - -[discrete#es-connectors-scalability-balancing] -==== Workload balancing - -There is currently no division/balancing of workload for a single sync job across multiple connector service deployments. -Once a sync job is claimed by a connector service, it will run the job to completion - unless the connector service -instance fails. -In that case, another connector service instance will pick up the suspended job and restart it (from scratch). - -In 8.8.0+, the Connector Service provides concurrency control when there are multiple connector services connected to -the same Elasticsearch cluster, with the following expectations: - -* Multiple sync jobs can be scheduled for a given search index but only 1 sync job can be executed for a search index at any single time. -* Each sync job can only be claimed by 1 connector service. -* Only 1 connector service can perform management tasks at a time, for example: populating service types and configurations, validating filters, etc. - -[discrete#es-connectors-scalability-horizontal] -==== Horizontal Scalability - -Horizontal scaling can work if there are multiple connector services running and are configured to allow concurrent -syncs via their `service.max_concurrent_syncs` settings. - -Hypothetically, multiple Connector Services would naturally load balance to some extent even though we do not currently -have explicit load balancing functionality. diff --git a/docs/reference/connector/docs/connectors-security.asciidoc b/docs/reference/connector/docs/connectors-security.asciidoc deleted file mode 100644 index 4cbeacf3f28f0..0000000000000 --- a/docs/reference/connector/docs/connectors-security.asciidoc +++ /dev/null @@ -1,55 +0,0 @@ -[#es-connectors-security] -=== Connectors security -++++ -Security -++++ - -This document describes security considerations for <> and <>. - -Elastic Cloud deployments have strong security defaults. -For example, data is encrypted by default, whether at rest or in transit. - -Self-managed deployments require more upfront work to ensure strong security. -Refer to {ref}/secure-cluster.html[Secure the Elastic Stack^] in the Elasticsearch documentation for more information. - -[discrete#es-native-connectors-security-connections] -==== Access to credentials - -Credentials for the data source — such as API keys or username/password pair— are stored in your deployment's `.elastic-connectors` Elasticsearch index. -Therefore, the credentials are visible to all Elastic users with the `read` {ref}/security-privileges.html[indices privilege^] for that index. -By default, the following Elastic users have this privilege: the `elastic` superuser and the `kibana_system` user. -Enterprise Search service account tokens can also read the `.elastic-connectors` index. - -[discrete#es-native-connectors-security-api-key] -==== Access to internally stored API keys - -API keys for Elastic managed connectors are stored in the internal system index `.connector-secrets`. -Access to this index is restricted to authorized API calls only. -The cluster privilege `write_connector_secrets` is required to store or update secrets through the API. -Only the Enterprise Search instance has permission to read from this index. - -[discrete#es-native-connectors-security-dls] -===== Document-level security - -Document-level security is available for a subset of connectors. -DLS is available by default for the following connectors: - -include::_connectors-list-dls.asciidoc[] - -Learn more about this feature in <>, including availability and prerequisites. - -[discrete#es-native-connectors-security-deployment] -==== Access to documents - -Data synced from your data source are stored as documents in the Elasticsearch index you created. -This data is visible to all Elastic users with the `read` {ref}/security-privileges.html[indices privilege^] for that index. -Be careful to ensure that access to this index is _at least_ as restrictive as access to the original data source. - -[discrete#es-native-connectors-security-encryption] -==== Encryption - -Elastic Cloud automatically encrypts data at rest. -Data in transit is automatically encrypted using `https`. - -Self-managed deployments must implement encryption at rest. -See {ref}/configuring-stack-security.html[Configure security for the Elastic Stack^] in the Elasticsearch documentation for more information. diff --git a/docs/reference/connector/docs/connectors-self-managed.asciidoc b/docs/reference/connector/docs/connectors-self-managed.asciidoc deleted file mode 100644 index e119953019442..0000000000000 --- a/docs/reference/connector/docs/connectors-self-managed.asciidoc +++ /dev/null @@ -1,123 +0,0 @@ -[#es-build-connector] -== Self-managed connectors - -.Naming history -**** -Self-managed connectors were initially known as "connector clients". You might find this term in older documentation. -**** - -Self-managed <> are run on your own infrastructure. -This means they run outside of your Elastic deployment. - -You can run the <> from source or from a Docker container. - -We also have a quickstart option using *Docker Compose*, to spin up all the required services at once: Elasticsearch, Kibana, and the connectors service. -Refer to <> for more information. - -The following connectors are available as self-managed connectors: - -include::_connectors-list-clients.asciidoc[] - -[discrete#es-build-connector-prerequisites] -=== Availability and Elastic prerequisites - -[NOTE] -==== -Self-managed connectors currently don't support Windows. -Use this https://www.elastic.co/support/matrix#matrix_os[compatibility matrix^] to check which operating systems are supported by self-managed connectors. -Find this information under *self-managed connectors* on that page. -==== - -.*Expand* for Elastic prerequisites information -[%collapsible] -==== -Your Elastic deployment must include the following Elastic services: - -* *Elasticsearch* -* *Kibana* - -(A new Elastic Cloud deployment includes these services by default.) - -To run self-managed connectors, your self-deployed connector service version must match your Elasticsearch version. -For example, if you're running Elasticsearch 8.10.1, your connector service should be version 8.10.1.x. -Elastic does not support deployments running mismatched versions (except during upgrades). - -[NOTE] -====== -As of 8.10.0 _new_ self-managed connectors no longer require the Enterprise Search service. -However, if you are upgrading connectors from versions earlier than 8.9, you'll need to run Enterprise Search once to migrate your connectors to the new format. -In future releases, you may still need to run Enterprise Search for the purpose of migrations or upgrades. -====== - -You must have access to Kibana and have `write` {ref}/security-privileges.html[indices privileges^] for the `.elastic-connectors` index. - -To use connector clients in a self-managed environment, you must deploy the <>. - -*Support and licensing requirements* - -Depending on how you use self-managed connectors, support and licensing requirements will vary. - -Refer to the following subscriptions pages for details. -Find your connector of interest in the *Elastic Search* section under *Client Integrations*: - -* https://www.elastic.co/subscriptions/[Elastic self-managed subscriptions page] -* https://www.elastic.co/subscriptions/cloud[Elastic Cloud subscriptions page] - -Note the following information regarding support for self-managed connectors: - -* A converted but _unmodified_ managed connector is supported by Elastic. -* A converted but _customized_ managed connector is _not_ supported by Elastic. - -==== - -[discrete#es-build-connector-data-source-prerequisites] -.Data source prerequisites -**** -The first decision you need to make before deploying a connector is which third party service (data source) you want to sync to Elasticsearch. -Note that each data source will have specific prerequisites you'll need to meet to authorize the connector to access its data. -For example, certain data sources may require you to create an OAuth application, or create a service account. - -You'll need to check the individual connector documentation for these details. -**** - -[discrete#es-connectors-deploy-connector-service] -=== Deploy the connector service - -The connector service is a Python application that you must run on your own infrastructure when using self-managed connectors. -The source code is hosted in the https://github.com/elastic/connectors[elastic/connectors^] repository. - -You can run the connector service from source or use Docker: - -* <>. Use this option if you're comfortable working with Python and want to iterate quickly locally. -* <>. Use this option if you want to deploy the connectors to a server, or use a container orchestration platform. -** Refer to our <> for a quick way to spin up all the required services at once. - -[discrete#es-build-connector-example] -=== Tutorials - -* Follow our <> to learn how run the self-managed connector service and a set up a self-managed connector, *using the UI*. -* Follow our <> to learn how to set up a self-managed connector *using the* {ref}/connector-apis.html[*connector APIs*]. - -These examples use the PostgreSQL connector but the basic process is the same for all self-managed connectors. - -[discrete#es-build-connector-testing] -=== Connector testing - -The connector framework enables you to run end-to-end (E2E) tests on your self-managed connectors, against a real data source. - -To avoid tampering with a real Elasticsearch instance, E2E tests run an isolated Elasticsearch instance in Docker. -Configuration values are set in your `docker-compose.yml` file. -Docker Compose manages the setup of the development environment, including both the mock Elastic instance and mock data source. - -E2E tests use *default* configuration values for the connector. -Find instructions about testing in each connector's documentation. - -[discrete#es-build-connector-framework] -=== Connector framework - -The Elastic connector framework enables you to: - -* Customize existing self-managed connectors. -* Build your own self-managed connectors. - -Refer to <> for more information. diff --git a/docs/reference/connector/docs/connectors-servicenow.asciidoc b/docs/reference/connector/docs/connectors-servicenow.asciidoc deleted file mode 100644 index 3dc98ed9a44c9..0000000000000 --- a/docs/reference/connector/docs/connectors-servicenow.asciidoc +++ /dev/null @@ -1,494 +0,0 @@ -[#es-connectors-servicenow] -=== Elastic ServiceNow connector reference -++++ -ServiceNow -++++ -// Attributes used in this file -:service-name: ServiceNow -:service-name-stub: servicenow - -The _Elastic ServiceNow connector_ is a <> for https://www.servicenow.com[ServiceNow^]. - -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-servicenow-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-servicenow-availability-prerequisites] -===== Availability and prerequisites - -The ServiceNow connector is available natively in Elastic Cloud since 8.10.0. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[discrete#es-connectors-servicenow-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-servicenow-usage] -===== Usage - -To use this connector natively in Elastic Cloud, see <>. - -For additional operations, see <> - -[discrete#es-connectors-servicenow-compatibility] -===== Compatibility - -The ServiceNow connector is compatible with the following versions of ServiceNow: - -* ServiceNow "Tokyo" -* ServiceNow "San Diego" -* ServiceNow "Rome" -* ServiceNow "Utah" -* ServiceNow "Vancouver" -* ServiceNow "Washington" -* ServiceNow "Xanadu" - -[discrete#es-connectors-servicenow-configuration] -===== Configuration - -The following configuration fields are required to set up the connector: - -ServiceNow URL:: -The host URL of the ServiceNow instance. - -Username:: -The username of the account used for ServiceNow. - -Password:: -The password of the account used for ServiceNow. - -Comma-separated list of services:: -Comma-separated list of services to fetch data from ServiceNow. If the value is `*`, the connector will fetch data from the list of basic services provided by ServiceNow: -- link:https://docs.servicenow.com/bundle/utah-platform-administration/page/administer/roles/concept/user.html[User] -- link:https://docs.servicenow.com/bundle/tokyo-it-service-management/page/product/incident-management/concept/c_IncidentManagement.html[Incident] -- link:https://docs.servicenow.com/bundle/tokyo-servicenow-platform/page/use/service-catalog-requests/task/t_AddNewRequestItems.html[Requested Item] -- link:https://docs.servicenow.com/bundle/tokyo-customer-service-management/page/product/customer-service-management/task/t_SearchTheKnowledgeBase.html[Knowledge] -- link:https://docs.servicenow.com/bundle/tokyo-it-service-management/page/product/change-management/task/t_CreateAChange.html[Change request] -+ -[NOTE] -==== -If you have configured a custom service, the `*` value will not fetch data from the basic services above by default. In this case you'll need to mention these service names explicitly. -==== -Default value is `*`. Examples: -+ - - `User, Incident, Requested Item, Knowledge, Change request` - - `*` - -Enable document level security:: -Restrict access to documents based on a user's permissions. -Refer to <> for more details. - -[discrete#es-connectors-servicenow-documents-syncs] -===== Documents and syncs - -All services and records the user has access to will be indexed according to the configurations provided. -The connector syncs the following ServiceNow object types: - -* Records -* Attachments - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced by default. Refer to <> for more details. -==== - -[discrete#es-connectors-servicenow-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-servicenow-dls] -===== Document level security - -<> ensures identities and permissions set in ServiceNow are maintained in Elasticsearch. -This enables you to restrict and personalize read-access users and groups have to documents in this index. -Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - -The ServiceNow connector supports roles for access control lists (ACLs) to enable document level security in {es}. -For default services, connectors use the following roles to find users who have access to documents. - -|=== -| Service | Roles - -| User | `admin` - -| Incident | `admin`, `sn_incident_read`, `ml_report_user`, `ml_admin`, `itil` - -| Requested Item | `admin`, `sn_request_read`, `asset`, `atf_test_designer`, `atf_test_admin` - -| Knowledge | `admin`, `knowledge`, `knowledge_manager`, `knowledge_admin` - -| Change request | `admin`, `sn_change_read`, `itil` -|=== - -For services other than these defaults, the connector iterates over access controls with `read` operations and finds the respective roles for those services. - -[NOTE] -==== -The ServiceNow connector does not support scripted and conditional permissions. -==== - -[discrete#es-connectors-servicenow-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-servicenow-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -The following sections provide examples of advanced sync rules for this connector. - -[discrete#es-connectors-servicenow-sync-rules-number-incident-service] -*Indexing document based on incident number for Incident service* - -[source,js] ----- -[ - { - "service": "Incident", - "query": "numberSTARTSWITHINC001" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-servicenow-sync-rules-active-false-user-service] -*Indexing document based on user activity state for User service* - -[source,js] ----- -[ - { - "service": "User", - "query": "active=False" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-servicenow-sync-rules-author-administrator-knowledge-service] -*Indexing document based on author name for Knowledge service* - -[source,js] ----- -[ - { - "service": "Knowledge", - "query": "author.nameSTARTSWITHSystem Administrator" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-servicenow-known-issues] -===== Known issues - -There are no known issues for this connector. -Refer to <> for a list of known issues that impact all connectors. - -[discrete#es-connectors-servicenow-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-servicenow-security] -===== Security - -See <>. - -[discrete#es-connectors-servicenow-content-extraction] -===== Content extraction - -See <>. - - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-servicenow-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-servicenow-client-availability-prerequisites] -===== Availability and prerequisites - -The ServiceNow connector was introduced in Elastic version 8.9.0. -This connector is available as a self-managed *self-managed connector*. -To use this connector as a self-managed connector, satisfy all <>. - -[discrete#es-connectors-servicenow-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-servicenow-client-usage] -===== Usage -To use this connector as a *self-managed connector*, use the *Customized connector* workflow. - -For additional operations, see <>. - -[discrete#es-connectors-servicenow-client-compatibility] -===== Compatibility - -The ServiceNow connector is compatible with the following versions of ServiceNow: - -* ServiceNow "Tokyo" -* ServiceNow "San Diego" -* ServiceNow "Rome" -* ServiceNow "Utah" -* ServiceNow "Vancouver" -* ServiceNow "Washington" -* ServiceNow "Xanadu" - -[discrete#es-connectors-servicenow-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/servicenow.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required to set up the connector: - -`url`:: -The host url of the ServiceNow instance. - -`username`:: -The username of the account for ServiceNow. - -`password`:: -The password of the account used for ServiceNow. - -`services`:: -Comma-separated list of services to fetch data from ServiceNow. If the value is `*`, the connector will fetch data from the list of basic services provided by ServiceNow: -- link:https://docs.servicenow.com/bundle/utah-platform-administration/page/administer/roles/concept/user.html[User] -- link:https://docs.servicenow.com/bundle/tokyo-it-service-management/page/product/incident-management/concept/c_IncidentManagement.html[Incident] -- link:https://docs.servicenow.com/bundle/tokyo-servicenow-platform/page/use/service-catalog-requests/task/t_AddNewRequestItems.html[Requested Item] -- link:https://docs.servicenow.com/bundle/tokyo-customer-service-management/page/product/customer-service-management/task/t_SearchTheKnowledgeBase.html[Knowledge] -- link:https://docs.servicenow.com/bundle/tokyo-it-service-management/page/product/change-management/task/t_CreateAChange.html[Change request] -+ -[NOTE] -==== -If you have configured a custom service, the `*` value will not fetch data from the basic services above by default. In this case you'll need to mention these service names explicitly. -==== -Default value is `*`. Examples: -+ - - `User, Incident, Requested Item, Knowledge, Change request` - - `*` - -`retry_count`:: -The number of retry attempts after a failed request to ServiceNow. Default value is `3`. - -`concurrent_downloads`:: -The number of concurrent downloads for fetching the attachment content. This speeds up the content extraction of attachments. Defaults to `10`. - -`use_text_extraction_service`:: -Requires a separate deployment of the <>. -Requires that ingest pipeline settings disable text extraction. -Default value is `False`. - -`use_document_level_security`:: -Restrict access to documents based on a user's permissions. -Refer to <> for more details. - -[discrete#es-connectors-servicenow-client-documents-syncs] -===== Documents and syncs - -All services and records the user has access to will be indexed according to the configurations provided. -The connector syncs the following ServiceNow object types: - -* Records -* Attachments - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. Use the <> to handle larger binary files. -* Permissions are not synced by default. You must enable <>. Otherwise, **all documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-servicenow-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-servicenow-client-dls] -===== Document level security - -<> ensures identities and permissions set in ServiceNow are maintained in Elasticsearch. -This enables you to restrict and personalize read-access users and groups have to documents in this index. -Access control syncs ensure this metadata is kept up to date in your Elasticsearch documents. - -The ServiceNow connector supports roles for access control lists (ACLs) to enable document level security in {es}. -For default services, connectors use the following roles to find users who have access to documents. - -|=== -| Service | Roles - -| User | `admin` - -| Incident | `admin`, `sn_incident_read`, `ml_report_user`, `ml_admin`, `itil` - -| Requested Item | `admin`, `sn_request_read`, `asset`, `atf_test_designer`, `atf_test_admin` - -| Knowledge | `admin`, `knowledge`, `knowledge_manager`, `knowledge_admin` - -| Change request | `admin`, `sn_change_read`, `itil` -|=== - -For services other than these defaults, the connector iterates over access controls with `read` operations and finds the respective roles for those services. - -[NOTE] -==== -The ServiceNow connector does not support scripted and conditional permissions. -==== - -[discrete#es-connectors-servicenow-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-servicenow-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-servicenow-client-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -The following sections provide examples of advanced sync rules for this connector. - -[discrete#es-connectors-servicenow-client-sync-rules-number-incident-service] -*Indexing document based on incident number for Incident service* - -[source,js] ----- -[ - { - "service": "Incident", - "query": "numberSTARTSWITHINC001" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-servicenow-client-sync-rules-active-false-user-service] -*Indexing document based on user activity state for User service* - -[source,js] ----- -[ - { - "service": "User", - "query": "active=False" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-servicenow-client-sync-rules-author-administrator-knowledge-service] -*Indexing document based on author name for Knowledge service* - -[source,js] ----- -[ - { - "service": "Knowledge", - "query": "author.nameSTARTSWITHSystem Administrator" - } -] ----- -// NOTCONSOLE - -[discrete#es-connectors-servicenow-client-connector-client-operations-testing] -===== End-to-end Testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the ServiceNow connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=servicenow ----- - -Generate performance reports using the following flag: `PERF8=yes`. -Toggle test data set size between SMALL, MEDIUM and LARGE with the argument `DATA_SIZE=`. -By default, it is set to `MEDIUM`. - -Users do not need to have a running Elasticsearch instance or a ServiceNow source to run this test. -Docker Compose manages the complete setup of the development environment. - -[discrete#es-connectors-servicenow-client-known-issues] -===== Known issues - -There are no known issues for this connector. -Refer to <> for a list of known issues that impact all connectors. - -[discrete#es-connectors-servicenow-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-servicenow-client-security] -===== Security - -See <>. - -[discrete#es-connectors-servicenow-client-content-extraction] -===== Content extraction - -See <>. - - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-sharepoint-online.asciidoc b/docs/reference/connector/docs/connectors-sharepoint-online.asciidoc deleted file mode 100644 index d09e089f194ad..0000000000000 --- a/docs/reference/connector/docs/connectors-sharepoint-online.asciidoc +++ /dev/null @@ -1,1146 +0,0 @@ -[#es-connectors-sharepoint-online] -=== Elastic SharePoint Online connector reference -++++ -SharePoint Online -++++ -// Attributes used in this file -:service-name: SharePoint Online -:service-name-stub: sharepoint_online - -[TIP] -==== -Looking for the SharePoint *Server* connector? See <>. -==== - -The _Elastic SharePoint Online connector_ is a <> for https://www.microsoft.com/en-ww/microsoft-365/sharepoint/[Microsoft SharePoint Online^]. - -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -.Choose your connector reference -******************************* -Are you using a managed connector on Elastic Cloud or a self-managed connector? Expand the documentation based on your deployment method. -******************************* - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-sharepoint-online-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-sharepoint-online-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a *managed connector* in Elastic versions *8.9.0 and later*. -To use this connector natively in Elastic Cloud, satisfy all <>. - -[NOTE] -==== -This connector requires a subscription. -View the requirements for this feature under the *Elastic Search* section of the https://www.elastic.co/subscriptions[Elastic Stack subscriptions^] page. -==== - -[discrete#es-connectors-sharepoint-online-usage] -===== Usage - -To use this connector as a *managed connector*, see <>. - -For additional operations, see <>. - -[discrete#es-connectors-sharepoint-online-sharepoint-prerequisites] -===== SharePoint prerequisites - -[discrete#es-connectors-sharepoint-online-oauth-app-create] -====== Create SharePoint OAuth app - -Before you can configure the connector, you must create an **OAuth App** in the SharePoint Online platform. -Your connector will authenticate to SharePoint as the registered OAuth application/client. -You'll collect values (`client ID`, `tenant ID`, and `client secret`) during this process that you'll need for the <> in Kibana. - -To get started, first log in to SharePoint Online and access your administrative dashboard. -Ensure you are logged in as the Azure Portal **service account**. - -Follow these steps: - -* Sign in to https://portal.azure.com/ and click on **Azure Active Directory**. -* Locate **App Registrations** and Click **New Registration**. -* Give your app a name - like "Search". -* Leave the *Redirect URIs* blank for now. -* *Register* the application. -* Find and keep the **Application (client) ID** and **Directory (tenant) ID** handy. -* Create a certificate and private key. This can, for example, be done by running `openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout azure_app.key -out azure_app.crt` command. Store both in a safe and secure place -* Locate the **Certificates** by navigating to **Client credentials: Certificates & Secrets**. -* Select **Upload certificate** -* Upload the certificate created in one of previous steps: `azure_app.crt` -* Set up the permissions the OAuth App will request from the Azure Portal service account. -** Navigate to **API Permissions** and click **Add Permission**. -** Add **application permissions** until the list looks like the following: -+ -``` -Graph API -- Sites.Selected -- Files.Read.All -- Group.Read.All -- User.Read.All - -Sharepoint -- Sites.Selected -``` -NOTE: If the `Comma-separated list of sites` configuration is set to `*` or if a user enables the toggle button `Enumerate all sites`, the connector requires `Sites.Read.All` permission. - -* **Grant admin consent**, using the `Grant Admin Consent` link from the permissions screen. -* Save the tenant name (i.e. Domain name) of Azure platform. - -[WARNING] -==== -The connector requires application permissions. It does not support delegated permissions (scopes). -==== - -[NOTE] -==== -The connector uses the https://learn.microsoft.com/en-us/sharepoint/dev/apis/sharepoint-rest-graph[Graph API^] (stable https://learn.microsoft.com/en-us/graph/api/overview?view=graph-rest-1.0#other-api-versions[v1.0 API^]) where possible to fetch data from Sharepoint Online. -When entities are not available via the Graph API the connector falls back to using the Sharepoint https://learn.microsoft.com/en-us/sharepoint/dev/sp-add-ins/get-to-know-the-sharepoint-rest-service[REST API^]. -==== - -[discrete#es-connectors-sharepoint-online-oauth-app-permissions] -====== SharePoint permissions - -Microsoft is https://learn.microsoft.com/en-us/sharepoint/dev/sp-add-ins/retirement-announcement-for-azure-acs[retiring Azure Access Control Service (ACS)]. This affects permission configuration: - -* *Tenants created after November 1st, 2024*: Certificate authentication is required -* *Tenants created before November 1st, 2024*: Secret-based authentication must be migrated to certificate authentication by April 2nd, 2026 - -[discrete#es-connectors-sharepoint-online-oauth-app-certificate-auth] -===== Certificate Authentication - -This authentication method does not require additional setup other than creating and uploading certificates to the OAuth App. - -[discrete#es-connectors-sharepoint-online-oauth-app-secret-auth] -===== Secret Authentication - -[IMPORTANT] -==== -This method is only applicable to tenants created before November 1st, 2024. This method will be fully retired as of April 2nd, 2026. -==== - -Refer to the following documentation for setting https://learn.microsoft.com/en-us/sharepoint/dev/solution-guidance/security-apponly-azureacs[SharePoint permissions^]. - -* To set `DisableCustomAppAuthentication` to false, connect to SharePoint using PowerShell and run `set-spotenant -DisableCustomAppAuthentication $false` -* To assign full permissions to the tenant in SharePoint Online, go to the tenant URL in your browser. -The URL follows this pattern: `https:///_layouts/15/appinv.aspx`. -This loads the SharePoint admin center page. -** In the *App ID* box, enter the application ID that you recorded earlier, and then click *Lookup*. -The application name will appear in the Title box. -** In the *App Domain* box, type .onmicrosoft.com -** In the *App's Permission Request XML* box, type the following XML string: -+ -[source, xml] ----- - - - - ----- - -[discrete#es-connectors-sharepoint-online-sites-selected-permissions] -====== Granting `Sites.Selected` permissions - -To configure `Sites.Selected` permissions, follow these steps in the Azure Active Directory portal. These permissions enable precise access control to specific SharePoint sites. - -. Sign in to the https://portal.azure.com/[Azure Active Directory portal^]. -. Navigate to **App registrations** and locate the application created for the connector. -. Under **API permissions**, click **Add permission**. -. Select **Microsoft Graph** > **Application permissions**, then add `Sites.Selected`. -. Click **Grant admin consent** to approve the permission. - -[TIP] -==== -Refer to the official https://learn.microsoft.com/en-us/graph/permissions-reference[Microsoft documentation] for managing permissions in Azure AD. -==== - -To assign access to specific SharePoint sites using `Sites.Selected`: - -. Use Microsoft Graph Explorer or PowerShell to grant access. -. To fetch the site ID, run the following Graph API query: -+ -[source, http] ----- -GET https://graph.microsoft.com/v1.0/sites?select=webUrl,Title,Id&$search="*" ----- -+ -This will return the `id` of the site. - -. Use the `id` to assign read or write access: -+ -[source, http] ----- -POST https://graph.microsoft.com/v1.0/sites//permissions -{ - "roles": ["read"], // or "write" - "grantedToIdentities": [ - { - "application": { - "id": "", - "displayName": "" - } - } - ] -} ----- - -[NOTE] -==== -When using the `Comma-separated list of sites` configuration field, ensure the sites specified match those granted `Sites.Selected` permission in SharePoint. -If the `Comma-separated list of sites` field is set to `*` or the `Enumerate all sites` toggle is enabled, the connector will attempt to access all sites. This requires broader permissions, which are not supported with `Sites.Selected`. -==== - -.Graph API permissions -**** -Microsoft recommends using Graph API for all operations with Sharepoint Online. Graph API is well-documented and more efficient at fetching data, which helps avoid throttling. -Refer to https://learn.microsoft.com/en-us/sharepoint/dev/general-development/how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online[Microsoft's throttling policies^] for more information. - -Here's a summary of why we use these Graph API permissions: - -* *Sites.Selected* is used to fetch the sites and their metadata -* *Files.Read.All* is used to fetch Site Drives and files in these drives -* *Groups.Read.All* is used to fetch groups for document-level permissions -* *User.Read.All* is used to fetch user information for document-level permissions - -Due to the way the Graph API is designed, these permissions are "all or nothing" - it's currently impossible to limit access to these resources. -**** - -[discrete#es-connectors-sharepoint-online-compatability] -===== Compatibility - -This connector is compatible with SharePoint Online. - -[discrete#es-connectors-sharepoint-online-configuration] -===== Configuration - -Use the following configuration fields to set up the connector: - -Tenant ID:: -The tenant id for the Azure account hosting the Sharepoint Online instance. - -Tenant Name:: -The tenant name for the Azure account hosting the Sharepoint Online instance. - -Client ID:: -The client id to authenticate with SharePoint Online. - -Authentication Method:: -Authentication method to use to connector to Sharepoint Online and Rest APIs. `secret` is deprecated and `certificate` is recommended. - -Secret value:: -The secret value to authenticate with SharePoint Online, if Authentication Method: `secret` is chosen. - -Content of certificate file:: -Content of certificate file if Authentication Method: `certificate` is chosen. - -Content of private key file:: -Content of private key file if Authentication Method: `certificate` is chosen. - -Comma-separated list of sites:: -List of site collection names or paths to fetch from SharePoint. -When enumerating all sites, these values should be the _names_ of the sites. -Use `*` to include all available sites. -Examples: -* `collection1` -* `collection1,sub-collection` -* `*` -+ -When **not** enumerating all sites, these values should be the _paths_ (URL after `/sites/`) of the sites. -Examples: -* `collection1` -* `collection1,collection1/sub-collection` - -Enumerate all sites?:: -If enabled, the full list of all sites will be fetched from the API, in bulk, and will be filtered down to match the configured list of site names. -If disabled, each path in the configured list of site paths will be fetched individually from the API. -When disabled, `*` is not a valid configuration for `Comma-separated list of sites`. -Enabling this configuration is most useful when syncing large numbers (more than total/200) of sites. -This is because, at high volumes, it is more efficient to fetch sites in bulk. -When syncing fewer sites, disabling this configuration can result in improved performance. -This is because, at low volumes, it is more efficient to only fetch the sites that you need. - -Fetch sub-sites of configured sites?:: -Whether sub-sites of the configured site(s) should be automatically fetched. -This option is only available when not enumerating all sites (see above). - -Enable document level security:: -Toggle to enable <>. -When enabled, full and incremental syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -Access control syncs will fetch users' access control lists and store them in a separate index. -+ -Once enabled, the following granular permissions toggles will be available: - -** *Fetch drive item permissions*: Enable this option to fetch *drive item* specific permissions. -** *Fetch unique page permissions*: Enable this option to fetch unique *page* permissions. If this setting is disabled a page will inherit permissions from its parent site. -** *Fetch unique list permissions*: Enable this option to fetch unique *list* permissions. If this setting is disabled a list will inherit permissions from its parent site. -** *Fetch unique list item permissions*: Enable this option to fetch unique *list item* permissions. If this setting is disabled a list item will inherit permissions from its parent site. -+ -[NOTE] -==== -If left empty the default value `true` will be used for these granular permissions toggles. -Note that these settings may increase sync times. -==== - -[discrete#es-connectors-sharepoint-online-documents-syncs] -===== Documents and syncs - -The connector syncs the following SharePoint object types: - -* *Sites* (and subsites) -* *Lists* -* *List items* and *attachment content* -* *Document libraries* and *attachment content* (including web pages) - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced by default. Enable <> to sync permissions. -==== - -[TIP] -==== -*Making Sharepoint Site Pages Web Part content searchable* - -If you're using Web Parts on Sharepoint Site Pages and want to make this content searchable, you'll need to consult the https://learn.microsoft.com/en-us/sharepoint/dev/spfx/web-parts/guidance/integrate-web-part-properties-with-sharepoint#specify-web-part-property-value-type/[official documentation^]. - -We recommend setting `isHtmlString` to *True* for all Web Parts that need to be searchable. -==== - -[discrete#es-connectors-sharepoint-online-documents-syncs-limitations] -====== Limitations - -* The connector does not currently sync content from Teams-connected sites. - -[discrete#es-connectors-sharepoint-online-sync-rules] -===== Sync rules - -_Basic_ sync rules are identical for all connectors and are available by default. -For more information read <>. - -[discrete#es-connectors-sharepoint-online-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -The following section describes *advanced sync rules* for this connector. -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -<> for the Sharepoint Online connector enable you to avoid extracting and syncing older data that might no longer be relevant for search. - -Example: - -[source,js] ----- -{ - "skipExtractingDriveItemsOlderThan": 60 -} ----- -// NOTCONSOLE - -This rule will not extract content of any drive items (files in document libraries) that haven't been modified for 60 days or more. - -[discrete#es-connectors-sharepoint-online-sync-rules-limitations] -*Limitations of sync rules with incremental syncs* - -Changing sync rules after Sharepoint Online content has already been indexed can bring unexpected results, when using <>. - -Incremental syncs ensure _updates_ from 3rd-party system, but do not modify existing documents in the index. - -*To avoid these issues, run a full sync after changing sync rules (basic or advanced).* - -Let's take a look at several examples where incremental syncs might lead to inconsistent data on your index. - -[discrete#es-connectors-sharepoint-online-sync-rules-limitations-restrictive-added] -*Example: Restrictive basic sync rule added after a full sync* - -Imagine your Sharepoint Online drive contains the following drive items: - -[source,txt] ----- -/Documents/Report.doc -/Documents/Spreadsheet.xls -/Presentations/Q4-2020-Report.pdf -/Presentations/Q4-2020-Report-Data.xls -/Personal/Documents/Sales.xls ----- - -After a sync, all these drive items will be stored on your Elasticsearch index. -Let's add a basic sync rule, filtering files by their path: - -[source,txt] ----- -Exclude WHERE path CONTAINS "Documents" ----- - -These filtering rules will exclude all files with "Documents" in their path, leaving only files in `/Presentations` directory: - -[source,txt] ----- -/Presentations/Q4-2020-Report.pdf -/Presentations/Q4-2020-Report-Data.xls ----- - -If no files were changed, incremental sync will not receive information about changes from Sharepoint Online and won't be able to delete any files, leaving the index in the same state it was before the sync. - -After a *full sync*, the index will be updated and files that are excluded by sync rules will be removed. - -[discrete#es-connectors-sharepoint-online-sync-rules-limitations-restrictive-removed] -*Example: Restrictive basic sync rules removed after a full sync* - -Imagine that Sharepoint Online drive has the following drive items: - -[source,txt] ----- -/Documents/Report.doc -/Documents/Spreadsheet.xls -/Presentations/Q4-2020-Report.pdf -/Presentations/Q4-2020-Report-Data.xls -/Personal/Documents/Sales.xls ----- - -Before doing a sync, we add a restrictive basic filtering rule: - -[source,txt] ----- -Exclude WHERE path CONTAINS "Documents" ----- - -After a full sync, the index will contain only files in the `/Presentations` directory: - -[source,txt] ----- -/Presentations/Q4-2020-Report.pdf -/Presentations/Q4-2020-Report-Data.xls ----- - -Afterwards, we can remove the filtering rule and run an incremental sync. If no changes happened to the files, incremental sync will not mirror these changes in the Elasticsearch index, because Sharepoint Online will not report any changes to the items. -Only a *full sync* will include the items previously ignored by the sync rule. - -[discrete#es-connectors-sharepoint-online-sync-rules-limitations-restrictive-changed] -*Example: Advanced sync rules edge case* - -Advanced sync rules can be applied to limit which documents will have content extracted. -For example, it's possible to set a rule so that documents older than 180 days won't have content extracted. - -However, there is an edge case. -Imagine a document that is 179 days old and its content is extracted and indexed into Elasticsearch. -After 2 days, this document will be 181 days old. -Since this document was already ingested it will not be modified. -Therefore, the content will not be removed from the index, following an incremental sync. - -In this situation, if you want older documents to be removed, you will need to clean the index up manually. -For example, you can manually run an Elasticsearch query that removes drive item content older than 180 days: - -[source, console] ----- -POST INDEX_NAME/_update_by_query?conflicts=proceed -{ - "query": { - "bool": { - "filter": [ - { - "match": { - "object_type": "drive_item" - } - }, - { - "exists": { - "field": "file" - } - }, - { - "range": { - "lastModifiedDateTime": { - "lte": "now-180d" - } - } - } - ] - } - }, - "script": { - "source": "ctx._source.body = ''", - "lang": "painless" - } -} ----- -// TEST[skip:TODO] - -[discrete#es-connectors-sharepoint-online-dls] -===== Document-level security - -Document-level security (DLS) enables you to restrict access to documents based on a user's permissions. -This feature is available by default for this connector. - -Refer to <> on this page for how to enable DLS for this connector. - -[TIP] -==== -Refer to <> to learn how to ingest data from SharePoint Online with DLS enabled, when building a search application. -==== - -[discrete#es-connectors-sharepoint-online-content-extraction] -===== Content extraction - -[discrete#es-connectors-sharepoint-online-content-extraction-pipeline] -====== Default content extraction - -The default content extraction service is powered by the Enterprise Search default ingest pipeline. -(See {ref}/ingest-pipeline-search.html[Ingest pipelines for Search indices].) - -See <>. - -[discrete#es-connectors-sharepoint-online-content-extraction-local] -====== Local content extraction (for large files) - -The SharePoint Online self-managed connector supports large file content extraction (> *100MB*). -This requires: - -* A self-managed deployment of the Elastic Text Extraction Service. -* Text extraction to be _disabled_ in the default ingest pipeline settings. - -Refer to <> for more information. - -[discrete#es-connectors-sharepoint-online-known-issues] -===== Known issues - -* *Documents failing to sync due to SharePoint file and folder limits* -+ -SharePoint has limits on the number of files and folders that can be synced. -You might encounter an error like the following written to the body of documents that failed to sync: -`The file size exceeds the allowed limit. CorrelationId: fdb36977-7cb8-4739-992f-49878ada6686, UTC DateTime: 4/21/2022 11:24:22 PM` -+ -Refer to https://support.microsoft.com/en-us/office/download-files-and-folders-from-onedrive-or-sharepoint-5c7397b7-19c7-4893-84fe-d02e8fa5df05#:~:text=Downloads%20are%20subject%20to%20the,zip%20file%20and%2020GB%20overall[SharePoint documentation^] for more information about these limits. -+ -** *Syncing a large number of files* -+ -The connector will fail to download files from folders that contain more than 5000 files. -The List View Threshold (default 5000) is a limit that prevents operations with a high performance impact on the SharePoint Online environment. -+ -*Workaround:* Reduce batch size to avoid this issue. -+ -** *Syncing large files* -+ -SharePoint has file size limits, but these are configurable. -+ -*Workaround:* Increase the file size limit. -Refer to https://learn.microsoft.com/en-us/sharepoint/manage-site-collection-storage-limits#set-automatic-or-manual-site-storage-limits[SharePoint documentation^] for more information. -+ -** *Deleted documents counter is not updated during incremental syncs* -+ -If the configuration `Enumerate All Sites?` is enabled, incremental syncs may not behave as expected. -Drive Item documents that were deleted between incremental syncs may not be detected as deleted. -+ -*Workaround*: Disable `Enumerate All Sites?`, and configure full site paths for all desired sites. - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-sharepoint-online-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-sharepoint-onlinesecurity] -===== Security - -See <>. - -// Closing the collapsible section -=============== - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-sharepoint-online-connector-client-reference] -==== *Self-managed connector* - -.View *self-managed connector* reference - -[%collapsible] -=============== - -[discrete#es-connectors-sharepoint-online-client-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. -To use this connector as a self-managed connector, satisfy all <>. - -[NOTE] -==== -This connector requires a subscription. -View the requirements for this feature under the *Elastic Search* section of the https://www.elastic.co/subscriptions[Elastic Stack subscriptions^] page. -==== - -[discrete#es-connectors-sharepoint-online-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, see <> -For additional operations, see <>. - -[discrete#es-connectors-sharepoint-online-client-sharepoint-prerequisites] -===== SharePoint prerequisites - -[discrete#es-connectors-sharepoint-online-client-oauth-app-create] -====== Create SharePoint OAuth app - -Before you can configure the connector, you must create an **OAuth App** in the SharePoint Online platform. -Your connector will authenticate to SharePoint as the registered OAuth application/client. -You'll collect values (`client ID`, `tenant ID`, and `client secret`) during this process that you'll need for the <> in Kibana. - -To get started, first log in to SharePoint Online and access your administrative dashboard. -Ensure you are logged in as the Azure Portal **service account**. - -Follow these steps: - -* Sign in to https://portal.azure.com/ and click on **Azure Active Directory**. -* Locate **App Registrations** and Click **New Registration**. -* Give your app a name - like "Search". -* Leave the *Redirect URIs* blank for now. -* *Register* the application. -* Find and keep the **Application (client) ID** and **Directory (tenant) ID** handy. -* Create a certificate and private key. This can, for example, be done by running `openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout azure_app.key -out azure_app.crt` command. Store both in a safe and secure place -* Locate the **Certificates** by navigating to **Client credentials: Certificates & Secrets**. -* Select **Upload certificate** -* Upload the certificate created in one of previous steps: `azure_app.crt` -* Set up the permissions the OAuth App will request from the Azure Portal service account. -** Navigate to **API Permissions** and click **Add Permission**. -** Add **application permissions** until the list looks like the following: -+ -``` -Graph API -- Sites.Selected -- Files.Read.All -- Group.Read.All -- User.Read.All - -Sharepoint -- Sites.Selected -``` -NOTE: If the `Comma-separated list of sites` configuration is set to `*` or if a user enables the toggle button `Enumerate all sites`, the connector requires `Sites.Read.All` permission. - -* **Grant admin consent**, using the `Grant Admin Consent` link from the permissions screen. -* Save the tenant name (i.e. Domain name) of Azure platform. - -[WARNING] -==== -The connector requires application permissions. It does not support delegated permissions (scopes). -==== - -[NOTE] -==== -The connector uses the https://learn.microsoft.com/en-us/sharepoint/dev/apis/sharepoint-rest-graph[Graph API^] (stable https://learn.microsoft.com/en-us/graph/api/overview?view=graph-rest-1.0#other-api-versions[v1.0 API^]) where possible to fetch data from Sharepoint Online. -When entities are not available via the Graph API the connector falls back to using the Sharepoint https://learn.microsoft.com/en-us/sharepoint/dev/sp-add-ins/get-to-know-the-sharepoint-rest-service[REST API^]. -==== - -[discrete#es-connectors-sharepoint-online-client-oauth-app-permissions] -====== SharePoint permissions - -Microsoft is https://learn.microsoft.com/en-us/sharepoint/dev/sp-add-ins/retirement-announcement-for-azure-acs[retiring Azure Access Control Service (ACS)]. This affects permission configuration: -* *Tenants created after November 1st, 2024*: Certificate authentication is required -* *Tenants created before November 1st, 2024*: Secret-based authentication must be migrated to certificate authentication by April 2nd, 2026 - -[discrete#es-connectors-sharepoint-online-client-oauth-app-certificate-auth] -===== Certificate Authentication - -This authentication method does not require additional setup other than creating and uploading certificates to the OAuth App. - -[discrete#es-connectors-sharepoint-online-client-oauth-app-secret-auth] -===== Secret Authentication - -[IMPORTANT] -==== -This method is only applicable to tenants created before November 1st, 2024. This method will be fully retired as of April 2nd, 2026. -==== - -Refer to the following documentation for setting https://learn.microsoft.com/en-us/sharepoint/dev/solution-guidance/security-apponly-azureacs[SharePoint permissions^]. - -* To set `DisableCustomAppAuthentication` to false, connect to SharePoint using PowerShell and run `set-spotenant -DisableCustomAppAuthentication $false` -* To assign full permissions to the tenant in SharePoint Online, go to the tenant URL in your browser. -The URL follows this pattern: `https:///_layouts/15/appinv.aspx`. -This loads the SharePoint admin center page. -** In the *App ID* box, enter the application ID that you recorded earlier, and then click *Lookup*. -The application name will appear in the Title box. -** In the *App Domain* box, type .onmicrosoft.com -** In the *App's Permission Request XML* box, type the following XML string: -+ -[source, xml] ----- - - - - ----- - -[discrete#es-connectors-sharepoint-online-sites-selected-permissions-self-managed] -====== Granting `Sites.Selected` permissions - -To configure `Sites.Selected` permissions, follow these steps in the Azure Active Directory portal. These permissions enable precise access control to specific SharePoint sites. - -. Sign in to the https://portal.azure.com/[Azure Active Directory portal^]. -. Navigate to **App registrations** and locate the application created for the connector. -. Under **API permissions**, click **Add permission**. -. Select **Microsoft Graph** > **Application permissions**, then add `Sites.Selected`. -. Click **Grant admin consent** to approve the permission. - -[TIP] -==== -Refer to the official https://learn.microsoft.com/en-us/graph/permissions-reference[Microsoft documentation] for managing permissions in Azure AD. -==== - - -To assign access to specific SharePoint sites using `Sites.Selected`: - -. Use Microsoft Graph Explorer or PowerShell to grant access. -. To fetch the site ID, run the following Graph API query: -+ -[source, http] ----- -GET https://graph.microsoft.com/v1.0/sites?select=webUrl,Title,Id&$search="*" ----- -+ -This will return the `id` of the site. - -. Use the `id` to assign read or write access: -+ -[source, http] ----- -POST https://graph.microsoft.com/v1.0/sites//permissions -{ - "roles": ["read"], // or "write" - "grantedToIdentities": [ - { - "application": { - "id": "", - "displayName": "" - } - } - ] -} ----- - -[NOTE] -==== -When using the `Comma-separated list of sites` configuration field, ensure the sites specified match those granted `Sites.Selected` permission in SharePoint. -If the `Comma-separated list of sites` field is set to `*` or the `Enumerate all sites` toggle is enabled, the connector will attempt to access all sites. This requires broader permissions, which are not supported with `Sites.Selected`. -==== - -.Graph API permissions -**** -Microsoft recommends using Graph API for all operations with Sharepoint Online. Graph API is well-documented and more efficient at fetching data, which helps avoid throttling. -Refer to https://learn.microsoft.com/en-us/sharepoint/dev/general-development/how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online[Microsoft's throttling policies^] for more information. - -Here's a summary of why we use these Graph API permissions: - -* *Sites.Selected* is used to fetch the sites and their metadata -* *Files.Read.All* is used to fetch Site Drives and files in these drives -* *Groups.Read.All* is used to fetch groups for document-level permissions -* *User.Read.All* is used to fetch user information for document-level permissions - -Due to the way the Graph API is designed, these permissions are "all or nothing" - it's currently impossible to limit access to these resources. -**** - -[discrete#es-connectors-sharepoint-online-client-compatability] -===== Compatibility - -This connector is compatible with SharePoint Online. - -[discrete#es-connectors-sharepoint-online-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/sharepoint_online.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -Use the following configuration fields to set up the connector: - -`tenant_id`:: -The tenant id for the Azure account hosting the Sharepoint Online instance. - -`tenant_name`:: -The tenant name for the Azure account hosting the Sharepoint Online instance. - -`client_id`:: -The client id to authenticate with SharePoint Online. - -`auth_method`:: -Authentication method to use to connector to Sharepoint Online and Rest APIs. `secret` is deprecated and `certificate` is recommended. - -`secret_value`:: -The secret value to authenticate with SharePoint Online, if auth_method: `secret` is chosen. - -`certificate`:: -Content of certificate file if auth_method: `certificate` is chosen. - -`private_key`:: -Content of private key file if auth_method: `certificate` is chosen. - -`site_collections`:: -List of site collection names or paths to fetch from SharePoint. -When enumerating all sites, these values should be the _names_ of the sites. -Use `*` to include all available sites. -Examples: -* `collection1` -* `collection1,sub-collection` -* `*` -+ -When **not** enumerating all sites, these values should be the _paths_ (URL after `/sites/`) of the sites. -Examples: -* `collection1` -* `collection1,collection1/sub-collection` - -`enumerate_all_sites`:: -If enabled, the full list of all sites will be fetched from the API, in bulk, and will be filtered down to match the configured list of site names. -If disabled, each path in the configured list of site paths will be fetched individually from the API. -Enabling this configuration is most useful when syncing large numbers (more than total/200) of sites. -This is because, at high volumes, it is more efficient to fetch sites in bulk. -When syncing fewer sites, disabling this configuration can result in improved performance. -This is because, at low volumes, it is more efficient to only fetch the sites that you need. -+ -[NOTE] -==== -When disabled, `*` is not a valid configuration for `Comma-separated list of sites`. -==== - -`fetch_subsites`:: -Whether sub-sites of the configured site(s) should be automatically fetched. -This option is only available when not enumerating all sites (see above). - -`use_text_extraction_service`:: -Toggle to enable local text extraction service for documents. -Requires a separate deployment of the <>. -Requires that ingest pipeline settings disable text extraction. -Default value is `False`. - -`use_document_level_security`:: -Toggle to enable <>. -When enabled, full and incremental syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. -Access control syncs will fetch users' access control lists and store them in a separate index. -+ -Once enabled, the following granular permissions toggles will be available: - -** *Fetch drive item permissions*: Enable this option to fetch *drive item* specific permissions. -** *Fetch unique page permissions*: Enable this option to fetch unique *page* permissions. If this setting is disabled a page will inherit permissions from its parent site. -** *Fetch unique list permissions*: Enable this option to fetch unique *list* permissions. If this setting is disabled a list will inherit permissions from its parent site. -** *Fetch unique list item permissions*: Enable this option to fetch unique *list item* permissions. If this setting is disabled a list item will inherit permissions from its parent site. -+ -[NOTE] -==== -If left empty the default value `true` will be used for these granular permissions toggles. -Note that these settings may increase sync times. -==== - -[discrete#es-connectors-sharepoint-online-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-sharepoint-online-client-documents-syncs] -===== Documents and syncs - -The connector syncs the following SharePoint object types: - -* *Sites* (and subsites) -* *Lists* -* *List items* and *attachment content* -* *Document libraries* and *attachment content* (including web pages) - -[TIP] -==== -*Making Sharepoint Site Pages Web Part content searchable* - -If you're using Web Parts on Sharepoint Site Pages and want to make this content searchable, you'll need to consult the https://learn.microsoft.com/en-us/sharepoint/dev/spfx/web-parts/guidance/integrate-web-part-properties-with-sharepoint#specify-web-part-property-value-type/[official documentation^]. - -We recommend setting `isHtmlString` to *True* for all Web Parts that need to be searchable. -==== - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted by default. Use the <> to handle larger binary files. -* Permissions are not synced by default. Enable <> to sync permissions. -==== - -[discrete#es-connectors-sharepoint-online-client-documents-syncs-limitations] -====== Limitations - -* The connector does not currently sync content from Teams-connected sites. - -[discrete#es-connectors-sharepoint-online-client-sync-rules] -===== Sync rules - -_Basic_ sync rules are identical for all connectors and are available by default. -For more information read <>. - -[discrete#es-connectors-sharepoint-online-client-sync-rules-advanced] -====== Advanced sync rules - -[NOTE] -==== -A <> is required for advanced sync rules to take effect. -==== - -The following section describes *advanced sync rules* for this connector. -Advanced sync rules are defined through a source-specific DSL JSON snippet. - -<> for the Sharepoint Online connector enable you to avoid extracting and syncing older data that might no longer be relevant for search. - -Example: - -[source,js] ----- -{ - "skipExtractingDriveItemsOlderThan": 60 -} - ----- -// NOTCONSOLE - -This rule will not extract content of any drive items (files in document libraries) that haven't been modified for 60 days or more. - -[discrete#es-connectors-sharepoint-online-client-sync-rules-limitations] -*Limitations of sync rules with incremental syncs* - -Changing sync rules after Sharepoint Online content has already been indexed can bring unexpected results, when using <>. - -Incremental syncs ensure _updates_ from 3rd-party system, but do not modify existing documents in the index. - -*To avoid these issues, run a full sync after changing sync rules (basic or advanced).* - -Let's take a look at several examples where incremental syncs might lead to inconsistent data on your index. - -[discrete#es-connectors-sharepoint-online-client-sync-rules-limitations-restrictive-added] -*Example: Restrictive basic sync rule added after a full sync* - -Imagine your Sharepoint Online drive contains the following drive items: - -[source,txt] ----- -/Documents/Report.doc -/Documents/Spreadsheet.xls -/Presentations/Q4-2020-Report.pdf -/Presentations/Q4-2020-Report-Data.xls -/Personal/Documents/Sales.xls ----- - -After a sync, all these drive items will be stored on your Elasticsearch index. -Let's add a basic sync rule, filtering files by their path: - -[source,txt] ----- -Exclude WHERE path CONTAINS "Documents" ----- - -These filtering rules will exclude all files with "Documents" in their path, leaving only files in `/Presentations` directory: - -[source,txt] ----- -/Presentations/Q4-2020-Report.pdf -/Presentations/Q4-2020-Report-Data.xls ----- - -If no files were changed, incremental sync will not receive information about changes from Sharepoint Online and won't be able to delete any files, leaving the index in the same state it was before the sync. - -After a *full sync*, the index will be updated and files that are excluded by sync rules will be removed. - -[discrete#es-connectors-sharepoint-online-client-sync-rules-limitations-restrictive-removed] -*Example: Restrictive basic sync rules removed after a full sync* - -Imagine that Sharepoint Online drive has the following drive items: - -[source,txt] ----- -/Documents/Report.doc -/Documents/Spreadsheet.xls -/Presentations/Q4-2020-Report.pdf -/Presentations/Q4-2020-Report-Data.xls -/Personal/Documents/Sales.xls ----- - -Before doing a sync, we add a restrictive basic filtering rule: - -[source,txt] ----- -Exclude WHERE path CONTAINS "Documents" ----- - -After a full sync, the index will contain only files in the `/Presentations` directory: - -[source,txt] ----- -/Presentations/Q4-2020-Report.pdf -/Presentations/Q4-2020-Report-Data.xls ----- - -Afterwards, we can remove the filtering rule and run an incremental sync. If no changes happened to the files, incremental sync will not mirror these changes in the Elasticsearch index, because Sharepoint Online will not report any changes to the items. -Only a *full sync* will include the items previously ignored by the sync rule. - -[discrete#es-connectors-sharepoint-online-client-sync-rules-limitations-restrictive-changed] -*Example: Advanced sync rules edge case* - -Advanced sync rules can be applied to limit which documents will have content extracted. -For example, it's possible to set a rule so that documents older than 180 days won't have content extracted. - -However, there is an edge case. -Imagine a document that is 179 days old and its content is extracted and indexed into Elasticsearch. -After 2 days, this document will be 181 days old. -Since this document was already ingested it will not be modified. -Therefore, the content will not be removed from the index, following an incremental sync. - -In this situation, if you want older documents to be removed, you will need to clean the index up manually. -For example, you can manually run an Elasticsearch query that removes drive item content older than 180 days: - -[source, console] ----- -POST INDEX_NAME/_update_by_query?conflicts=proceed -{ - "query": { - "bool": { - "filter": [ - { - "match": { - "object_type": "drive_item" - } - }, - { - "exists": { - "field": "file" - } - }, - { - "range": { - "lastModifiedDateTime": { - "lte": "now-180d" - } - } - } - ] - } - }, - "script": { - "source": "ctx._source.body = ''", - "lang": "painless" - } -} ----- -// TEST[skip:TODO] - -[discrete#es-connectors-sharepoint-online-client-dls] -===== Document-level security - -Document-level security (DLS) enables you to restrict access to documents based on a user's permissions. -This feature is available by default for this connector. - -Refer to <> on this page for how to enable DLS for this connector. - -[TIP] -==== -Refer to <> to learn how to ingest data from SharePoint Online with DLS enabled, when building a search application. -==== - -[discrete#es-connectors-sharepoint-online-client-content-extraction] -===== Content extraction - -[discrete#es-connectors-sharepoint-online-client-content-extraction-pipeline] -====== Default content extraction - -The default content extraction service is powered by the Enterprise Search default ingest pipeline. -(See {ref}/ingest-pipeline-search.html[Ingest pipelines for Search indices].) - -See <>. - -[discrete#es-connectors-sharepoint-online-client-content-extraction-local] -====== Local content extraction (for large files) - -The SharePoint Online self-managed connector supports large file content extraction (> *100MB*). -This requires: - -* A self-managed deployment of the Elastic Text Extraction Service. -* Text extraction to be _disabled_ in the default ingest pipeline settings. - -Refer to <> for more information. - -[discrete#es-connectors-sharepoint-online-client-testing] -===== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the SharePoint Online connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=sharepoint_online ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=sharepoint_online DATA_SIZE=small ----- - -[discrete#es-connectors-sharepoint-online-client-known-issues] -===== Known issues - -* *Documents failing to sync due to SharePoint file and folder limits* -+ -SharePoint has limits on the number of files and folders that can be synced. -You might encounter an error like the following written to the body of documents that failed to sync: -`The file size exceeds the allowed limit. CorrelationId: fdb36977-7cb8-4739-992f-49878ada6686, UTC DateTime: 4/21/2022 11:24:22 PM` -+ -Refer to https://support.microsoft.com/en-us/office/download-files-and-folders-from-onedrive-or-sharepoint-5c7397b7-19c7-4893-84fe-d02e8fa5df05#:~:text=Downloads%20are%20subject%20to%20the,zip%20file%20and%2020GB%20overall[SharePoint documentation^] for more information about these limits. -+ -** *Syncing a large number of files* -+ -The connector will fail to download files from folders that contain more than 5000 files. -The List View Threshold (default 5000) is a limit that prevents operations with a high performance impact on the SharePoint Online environment. -+ -*Workaround:* Reduce batch size to avoid this issue. -+ -** *Syncing large files* -+ -SharePoint has file size limits, but these are configurable. -+ -*Workaround:* Increase the file size limit. -Refer to https://learn.microsoft.com/en-us/sharepoint/manage-site-collection-storage-limits#set-automatic-or-manual-site-storage-limits[SharePoint documentation^] for more information. -+ -** *Deleted documents counter is not updated during incremental syncs* -+ -If the configuration `Enumerate All Sites?` is enabled, incremental syncs may not behave as expected. -Drive Item documents that were deleted between incremental syncs may not be detected as deleted. -+ -*Workaround*: Disable `Enumerate All Sites?`, and configure full site paths for all desired sites. - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-sharepoint-online-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-sharepoint-online-client-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-sharepoint.asciidoc b/docs/reference/connector/docs/connectors-sharepoint.asciidoc deleted file mode 100644 index d7a2307a9db80..0000000000000 --- a/docs/reference/connector/docs/connectors-sharepoint.asciidoc +++ /dev/null @@ -1,418 +0,0 @@ -[#es-connectors-sharepoint] -=== Elastic SharePoint Server connector reference -++++ -SharePoint Server -++++ -// Attributes used in this file -:service-name: SharePoint Server -:service-name-stub: sharepoint_server - -The _Elastic SharePoint Server connector_ is a <> for https://www.microsoft.com/en-ww/microsoft-365/sharepoint/[Microsoft SharePoint Server^]. - -This connector is written in Python using the open code {connectors-python}[Elastic connector framework^]. -View the {connectors-python}/connectors/sources/sharepoint_server.py[source code for this connector^]. - -[TIP] -==== -Looking for the SharePoint *Online* connector? See the <>. -==== - - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-sharepoint-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-sharepoint-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a managed service since Elastic *8.15.0*. -To use this connector, satisfy all <>. - -[NOTE] -==== -This connector is in *beta* and is subject to change. -Beta features are subject to change and are not covered by the support SLA of generally available (GA) features. -Elastic plans to promote this feature to GA in a future release. -==== - -[discrete#es-connectors-sharepoint-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-sharepoint-usage] -===== Usage - -See <>. - -For additional operations, see <>. - -[discrete#es-connectors-sharepoint-compatability] -===== Compatibility - -The following SharePoint Server versions are compatible: - -* SharePoint 2013 -* SharePoint 2016 -* SharePoint 2019 - -[discrete#es-connectors-sharepoint-configuration] -===== Configuration - -The following configuration fields are required to set up the connector: - -`authentication`:: -Authentication mode, either *Basic* or *NTLM*. - -`username`:: -The username of the account for the SharePoint Server instance. - -`password`:: -The password of the account. - -`host_url`:: -The server host url where the SharePoint Server instance is hosted. Examples: -* `https://192.158.1.38:8080` -* `https://.sharepoint.com` - -`site_collections`:: -Comma-separated list of site collections to fetch from SharePoint Server. Examples: -* `collection1` -* `collection1, collection2` - -`ssl_enabled`:: -Whether SSL verification will be enabled. -Default value is `False`. - -`ssl_ca`:: -Content of SSL certificate needed for SharePoint Server. -Keep this field empty, if `ssl_enabled` is set to `False`. -+ -Example certificate: -+ -[source, txt] ----- ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- ----- - -`retry_count`:: -The number of retry attempts after a failed request to the SharePoint Server instance. Default value is `3`. - -`use_document_level_security`:: -Toggle to enable <>. -When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. Access control syncs fetch users' access control lists and store them in a separate index. -+ -Once enabled, the following granular permissions toggles will be available: -+ -** *Fetch unique list permissions*: Enable this option to fetch unique *list* permissions. If this setting is disabled a list will inherit permissions from its parent site. -** *Fetch unique list item permissions*: Enable this option to fetch unique *list item* permissions. If this setting is disabled a list item will inherit permissions from its parent site. -+ -[NOTE] -==== -If left empty the default value `true` will be used for these granular permissions toggles. -Note that these settings may increase sync times. -==== - -[discrete#es-connectors-sharepoint-documents-syncs] -===== Documents and syncs - -The connector syncs the following SharePoint object types: - -* Sites and Subsites -* Lists -* List Items and its attachment content -* Document Libraries and its attachment content(include Web Pages) - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted by default. Use the <> to handle larger binary files. -* Permissions are not synced by default. Enable <> to sync permissions. -==== - -[discrete#es-connectors-sharepoint-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-sharepoint-document-level-security] -===== Document level security - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. -Refer to <> on this page for how to enable DLS for this connector. - -[NOTE] -==== -Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. -The example uses SharePoint _Online_ as the data source, but the same steps apply to every connector. -==== - -[discrete#es-connectors-sharepoint-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. -Currently filtering is controlled via ingest pipelines. - -[discrete#es-connectors-sharepoint-content-extraction] -===== Content Extraction - -See <>. - -[discrete#es-connectors-sharepoint-known-issues] -===== Known issues - -There are currently no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-sharepoint-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-sharepoint-security] -===== Security - -See <>. - -[discrete#es-connectors-sharepoint-source] -===== Framework and source - -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/sharepoint_server.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-sharepoint-connector-client-reference] -==== *Self-managed connector reference* - -.View *self-managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-sharepoint-client-availability-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. -This self-managed connector is compatible with Elastic versions *8.9.0+*. -To use this connector, satisfy all <>. - -[NOTE] -==== -This connector is in *beta* and is subject to change. -Beta features are subject to change and are not covered by the support SLA of generally available (GA) features. -Elastic plans to promote this feature to GA in a future release. -==== - -[discrete#es-connectors-sharepoint-client-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-sharepoint-client-usage] -===== Usage - -To use this connector as a *self-managed connector*, see <>. - -For additional operations, see <>. - -[discrete#es-connectors-sharepoint-client-compatability] -===== Compatibility - -The following SharePoint Server versions are compatible with the Elastic connector framework: - -* SharePoint 2013 -* SharePoint 2016 -* SharePoint 2019 - -[discrete#es-connectors-sharepoint-client-configuration] -===== Configuration - -[TIP] -==== -When using the <> workflow, initially these fields will use the default configuration set in the {connectors-python}/connectors/sources/sharepoint_server.py[connector source code^]. -These are set in the `get_default_configuration` function definition. - -These configurable fields will be rendered with their respective *labels* in the Kibana UI. -Once connected, you'll be able to update these values in Kibana. -==== - -The following configuration fields are required to set up the connector: - -`authentication`:: -Authentication mode, either *Basic* or *NTLM*. - -`username`:: -The username of the account for the SharePoint Server instance. - -`password`:: -The password of the account. - -`host_url`:: -The server host url where the SharePoint Server instance is hosted. Examples: -* `https://192.158.1.38:8080` -* `https://.sharepoint.com` - -`site_collections`:: -Comma-separated list of site collections to fetch from SharePoint Server. Examples: -* `collection1` -* `collection1, collection2` - -`ssl_enabled`:: -Whether SSL verification will be enabled. -Default value is `False`. - -`ssl_ca`:: -Content of SSL certificate needed for the SharePoint Server instance. -Keep this field empty, if `ssl_enabled` is set to `False`. -+ -Example certificate: -+ -[source, txt] ----- ------BEGIN CERTIFICATE----- -MIID+jCCAuKgAwIBAgIGAJJMzlxLMA0GCSqGSIb3DQEBCwUAMHoxCzAJBgNVBAYT -... -7RhLQyWn2u00L7/9Omw= ------END CERTIFICATE----- ----- - -`retry_count`:: -The number of retry attempts after failed request to the SharePoint Server instance. Default value is `3`. - -`use_document_level_security`:: -Toggle to enable <>. -When enabled, full syncs will fetch access control lists for each document and store them in the `_allow_access_control` field. Access control syncs fetch users' access control lists and store them in a separate index. -+ -Once enabled, the following granular permissions toggles will be available: -+ -** *Fetch unique list permissions*: Enable this option to fetch unique *list* permissions. If this setting is disabled a list will inherit permissions from its parent site. -** *Fetch unique list item permissions*: Enable this option to fetch unique *list item* permissions. If this setting is disabled a list item will inherit permissions from its parent site. -+ -[NOTE] -==== -If left empty the default value `true` will be used for these granular permissions toggles. -Note that these settings may increase sync times. -==== - -[discrete#es-connectors-sharepoint-client-docker] -===== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-sharepoint-client-documents-syncs] - -===== Documents and syncs - -The connector syncs the following SharePoint object types: - -* Sites and Subsites -* Lists -* List Items and its attachment content -* Document Libraries and its attachment content(include Web Pages) - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted by default. Use the <> to handle larger binary files. -* Permissions are not synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elasticsearch Index. -==== - -[discrete#es-connectors-sharepoint-client-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>, but this feature is currently disabled by default. -Refer to the linked documentation for enabling incremental syncs. - -[discrete#es-connectors-sharepoint-client-document-level-security] -===== Document level security - -Document level security (DLS) enables you to restrict access to documents based on a user’s permissions. -Refer to <> on this page for how to enable DLS for this connector. - -[NOTE] -==== -Refer to <> to learn how to ingest data from a connector with DLS enabled, when building a search application. -The example uses SharePoint Online as the data source, but the same steps apply to every connector. -==== - -[discrete#es-connectors-sharepoint-client-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. -Currently filtering is controlled via ingest pipelines. - -[discrete#es-connectors-sharepoint-client-content-extraction] -===== Content Extraction - -See <>. - -[discrete#es-connectors-sharepoint-client-connector-client-operations] -===== Self-managed connector operations - -[discrete#es-connectors-sharepoint-client-testing] -===== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the sharepoint connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=sharepoint_server ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=sharepoint_server DATA_SIZE=small ----- - -[discrete#es-connectors-sharepoint-client-known-issues] -===== Known issues - -There are currently no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-sharepoint-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-sharepoint-client-security] -===== Security - -See <>. - -[discrete#es-connectors-sharepoint-client-source] -===== Framework and source - -This connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/sharepoint_server.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-slack.asciidoc b/docs/reference/connector/docs/connectors-slack.asciidoc deleted file mode 100644 index 059394f28d4ec..0000000000000 --- a/docs/reference/connector/docs/connectors-slack.asciidoc +++ /dev/null @@ -1,344 +0,0 @@ -[#es-connectors-slack] -=== Elastic Slack connector reference -++++ -Slack -++++ -// Attributes used in this file -:service-name: Slack -:service-name-stub: slack - -The Slack connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-slack-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-slack-availability] -===== Availability and prerequisites - -This managed connector was introduced in Elastic *8.14.0* as a managed service on Elastic Cloud. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[NOTE] -==== -This connector is in **technical preview** and is subject to change. -The design and code is less mature than official GA features and is being provided as-is with no warranties. -Technical preview features are not subject to the support SLA of official GA features. -==== - -[discrete#es-connectors-slack-create-connector-native] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-slack-usage] -===== Usage - -To use this connector in the UI, select the *Notion* tile when creating a new connector under *Search -> Connectors*. - -If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. - -For additional operations, see <>. - -[NOTE] -==== -You need to create a Slack application to authenticate with Slack. -==== - -[discrete#es-connectors-slack-app] -====== Create a Slack application - -When created you'll receive a credential that the connector uses for authentication. -A new Bot user will also be created. - -[TIP] -==== -The connector will only sync messages from the channels of which the Bot user is a member. -==== - -To create the app, follow these steps: - -1. Go to https://api.slack.com/apps and click "Create New App". -2. Choose "From Scratch". -3. Name the app, and select the workspace you want to sync from. -Depending on the workspace's settings, you may get a warning about requiring admin approval. -That will be handled later. -4. Navigate to "OAuth & Permissions" in the sidebar. -5. Scroll down to the "Scopes" section and add these scopes: -* `channels:history` -* `channels:read` -* `users:read`. -+ -Optionally, you can also add `channels:join` if you want the App Bot to automatically be able to add itself to public channels. -6. Scroll up to "OAuth Tokens for Your Workspace" and install the application. Your workspace may require you to get administrator approval. If so, request approval now and return to the next step once it has been approved. -7. Copy and save the new "Bot User OAuth Token". -This credential will be used when configuring the connector. - -[discrete#es-connectors-slack-configuration] -===== Configuration - -The following settings are required to set up this connector: - -`token`(required) :: -The Bot User OAuth Token generated by creating and installing your Slack App. - -`fetch_last_n_days`(required) :: -The number of days of history to fetch from Slack. -This must be a positive number to fetch a subset of data, going back that many days. -If set to `0`, it will fetch all data since the beginning of the workspace. -The default is 180 days. - -`auto_join_channels`(required) :: -Whether or not the connector should have the App's Bot User automatically invite itself into all public channels. -The connector will only sync messages from the channels of which the Bot user is a member. -By default, the bot will not invite itself to any channels, and must be manually invited to each channel that you wish to sync. -If this setting is enabled, your App must have the `channels.join` scope. - -`sync_users`(required) :: - -Whether or not the connector should index a document for each Slack user. -By default, the connector will create documents only for Channels and Messages. -However, regardless of the value of this setting, the Slack App does need the `users.read` scope and will make requests to enumerate all of the workspace's users. -This allows the messages to be enriched with human-readable usernames, and not rely on unreadable User UIDs. -Therefore, disabling this setting does not result in a speed improvement, but merely results in less overall storage in Elasticsearch. - -[discrete#es-connectors-slack-sync-rules] -===== Sync rules - -_Basic_ sync rules are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. - -For more information read <>. - -[discrete#es-connectors-slack-content-extraction] -===== Content Extraction - -This connector does not currently support processing Slack attachments or other binary files. - -//See <>. - -[discrete#es-connectors-slack-documents-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Channels* -* *Messages* -* *Users* (configurable) - -[NOTE] -==== -* Only public channels and messages from public channels are synced. -* No permissions are synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - - -[discrete#es-connectors-slack-known-issues] -===== Known issues - -There are currently no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-slack-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-slack-security] -===== Security - -See <>. - - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-slack-connector-client-reference] -==== *Self-managed connector reference* - -.View *self-managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-slack-client-availability] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector* from the *Elastic connector framework*. - -This self-managed connector is compatible with Elastic versions *8.10.0+*. - -To use this connector, satisfy all <>. - -[NOTE] -==== -This connector is in **technical preview** and is subject to change. -The design and code is less mature than official GA features and is being provided as-is with no warranties. -Technical preview features are not subject to the support SLA of official GA features. -==== - -[discrete#es-connectors-slack-client-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-slack-client-usage] -===== Usage - -To use this connector as a **self-managed connector**, use the *Connector* workflow in the Kibana UI. - -For additional operations, see <>. - -[NOTE] -==== -You need to create a Slack application to authenticate with Slack. -==== - -[discrete#es-connectors-slack-client-app] -====== Create a Slack application - -When created you'll receive a credential that the connector uses for authentication. -A new Bot user will also be created. - -[TIP] -==== -The connector will only sync messages from the channels of which the Bot user is a member. -==== - -To create the app, follow these steps: - -1. Go to https://api.slack.com/apps and click "Create New App". -2. Choose "From Scratch". -3. Name the app, and select the workspace you want to sync from. -Depending on the workspace's settings, you may get a warning about requiring admin approval. -That will be handled later. -4. Navigate to "OAuth & Permissions" in the sidebar. -5. Scroll down to the "Scopes" section and add these scopes: -* `channels:history` -* `channels:read` -* `users:read`. -+ -Optionally, you can also add `channels:join` if you want the App Bot to automatically be able to add itself to public channels. -6. Scroll up to "OAuth Tokens for Your Workspace" and install the application. Your workspace may require you to get administrator approval. If so, request approval now and return to the next step once it has been approved. -7. Copy and save the new "Bot User OAuth Token". -This credential will be used when configuring the connector. - -[discrete#es-connectors-slack-client-docker] -===== Deploy with Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-slack-client-configuration] -===== Configuration - -The following settings are required to set up this connector: - -`token`(required) :: -The Bot User OAuth Token generated by creating and installing your Slack App. - -`fetch_last_n_days`(required) :: -The number of days of history to fetch from Slack. -This must be a positive number to fetch a subset of data, going back that many days. -If set to `0`, it will fetch all data since the beginning of the workspace. -The default is 180 days. - -`auto_join_channels`(required) :: -Whether or not the connector should have the App's Bot User automatically invite itself into all public channels. -The connector will only sync messages from the channels of which the Bot user is a member. -By default, the bot will not invite itself to any channels, and must be manually invited to each channel that you wish to sync. -If this setting is enabled, your App must have the `channels.join` scope. - -`sync_users`(required) :: - -Whether or not the connector should index a document for each Slack user. -By default, the connector will create documents only for Channels and Messages. -However, regardless of the value of this setting, the Slack App does need the `users.read` scope and will make requests to enumerate all of the workspace's users. -This allows the messages to be enriched with human-readable usernames, and not rely on unreadable User UIDs. -Therefore, disabling this setting does not result in a speed improvement, but merely results in less overall storage in Elasticsearch. - -[discrete#es-connectors-slack-client-sync-rules] -===== Sync rules - -_Basic_ sync rules are identical for all connectors and are available by default. - -Advanced sync rules are not available for this connector in the present version. - -For more information read <>. - -[discrete#es-connectors-slack-client-content-extraction] -===== Content Extraction - -This connector does not currently support processing Slack attachments or other binary files. - -//See <>. - -[discrete#es-connectors-slack-client-documents-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Channels* -* *Messages* -* *Users* (configurably) - -[NOTE] -==== -* Only public channels and messages from public channels are synced. -* No permissions are synced. **All documents** indexed to an Elastic deployment will be visible to **all users with access** to that Elastic Deployment. -==== - -[discrete#es-connectors-slack-client-connector-client-operations] -===== Self-managed connector operations - -[discrete#es-connectors-slack-client-testing] -===== End-to-end testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the GitHub connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=slack ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=slack DATA_SIZE=small ----- - -[discrete#es-connectors-slack-client-known-issues] -===== Known issues - -There are currently no known issues for this connector. -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-slack-client-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-slack-client-security] -===== Security - -See <>. - - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-teams.asciidoc b/docs/reference/connector/docs/connectors-teams.asciidoc deleted file mode 100644 index cd7d05d884349..0000000000000 --- a/docs/reference/connector/docs/connectors-teams.asciidoc +++ /dev/null @@ -1,359 +0,0 @@ -[#es-connectors-teams] -=== Elastic Microsoft Teams connector reference -++++ -Teams -++++ -// Attributes used in this file -:service-name: Microsoft Teams -:service-name-stub: microsoft_teams - -The Microsoft Teams connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/microsoft_teams.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-teams-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-microsoft-teams-native-availability-and-prerequisites] -===== Availability and prerequisites - -This managed connector was introduced in Elastic *8.14.0* as a managed service on Elastic Cloud. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[NOTE] -==== -This connector is in **technical preview** and is subject to change. -The design and code is less mature than official GA features and is being provided as-is with no warranties. -Technical preview features are not subject to the support SLA of official GA features. -==== - -[discrete#es-connectors-teams-create-connector-native] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-microsoft-teams-native-usage] -===== Usage - -To use this connector in the UI, select the *Teams* tile when creating a new connector under *Search -> Connectors*. - -If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. - -For additional operations, see <>. - -[discrete#es-connectors-microsoft-teams-native-connecting-to-microsoft-teams] -===== Connecting to Microsoft Teams - -To connect to Microsoft Teams you need to https://learn.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal[create an Azure Active Directory application and service principal] that can access resources. Follow these steps: - -1. Go to the https://portal.azure.com[Azure portal] and sign in with your Azure account. -2. Navigate to the *Azure Active Directory* service. -3. Select *App registrations* from the left-hand menu. -4. Click on the *New registration* button to register a new application. -5. Provide a *name* for your app, and _optionally_ select the supported account types (e.g., single tenant, multi-tenant). -6. Click on the *Register* button to create the app registration. -7. After the registration is complete, you will be redirected to the app's overview page. Take note of the *Application (client) ID* value, as you'll need it later. -8. Scroll down to the *API permissions* section and click on the "Add a permission" button. -9. In the "Request API permissions pane, select "Microsoft Graph" as the API. -10. Select the following *permissions*: -+ -* `TeamMember.Read.All` (Delegated) -* `Team.ReadBasic.All` (Delegated) -* `TeamsTab.Read.All` (Delegated) -* `Group.Read.All` (Delegated) -* `ChannelMessage.Read.All` (Delegated) -* `Chat.Read` (Delegated) & `Chat.Read.All` (Application) -* `Chat.ReadBasic` (Delegated) & `Chat.ReadBasic.All` (Application) -* `Files.Read.All` (Delegated and Application) -* `Calendars.Read` (Delegated and Application) -+ -11. Click on the *Add permissions* button to add the selected permissions to your app. -12. Click on the *Grant admin consent* button to grant the required permissions to the app. This step requires administrative privileges. *If you are not an admin, you need to request the admin to grant consent via their Azure Portal*. -13. Under the "Certificates & Secrets" tab, go to *Client Secrets*. -Generate a new client secret and keep a note of the string under the `Value` column. - -After completion, use the following configuration parameters to configure the connector. - -[discrete#es-connectors-microsoft-teams-native-configuration] -===== Configuration - -The following configuration fields are required: - -`client_id` (required):: -Unique identifier for your Azure Application, found on the app's overview page. Example: -* `ab123453-12a2-100a-1123-93fd09d67394` - -`secret_value` (required):: -String value that the application uses to prove its identity when requesting a token, available under the `Certificates & Secrets` tab of your Azure application menu. Example: -* `eyav1~12aBadIg6SL-STDfg102eBfCGkbKBq_Ddyu` - -`tenant_id` (required):: -Unique identifier for your Azure Active Directory instance, found on the app's overview page. Example: -* `123a1b23-12a3-45b6-7c8d-fc931cfb448d` - -`username` (required):: -Username for your Azure Application. Example: -* `dummy@3hmr2@onmicrosoft.com` - -`password` (required):: -Password for your Azure Application. Example: -* `changeme` - -[discrete#es-connectors-microsoft-teams-native-content-extraction] -====== Content Extraction - -Refer to <>. - -[discrete#es-connectors-microsoft-teams-native-documents-and-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *USER_CHATS_MESSAGE* -* *USER_CHAT_TABS* -* *USER_CHAT_ATTACHMENT* -* *USER_CHAT_MEETING_RECORDING* -* *USER_MEETING* -* *TEAMS* -* *TEAM_CHANNEL* -* *CHANNEL_TAB* -* *CHANNEL_MESSAGE* -* *CHANNEL_MEETING* -* *CHANNEL_ATTACHMENT* -* *CALENDAR_EVENTS* - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-microsoft-teams-native-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-microsoft-teams-native-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-microsoft-teams-native-advanced-sync-rules] -===== Advanced Sync Rules - -Advanced sync rules are not available for this connector in the present version. - -[discrete#es-connectors-microsoft-teams-native-known-issues] -===== Known issues - -* Messages in one-on-one chats for _Chat with Self_ users are not fetched via Graph APIs. Therefore, these messages won't be indexed into Elasticsearch. - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-microsoft-teams-native-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-microsoft-teams-native-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-teams-connector-client-reference] -==== *Self-managed connector reference* - -.View *self-managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-microsoft-teams-availability-and-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. -To use this connector, satisfy all <>. - -[NOTE] -==== -This connector is in *technical preview* and is subject to change. -The design and code is less mature than official GA features and is being provided as-is with no warranties. -Technical preview features are not subject to the support SLA of official GA features. -==== - -[discrete#es-connectors-teams-client-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-microsoft-teams-usage] -===== Usage - -To use this connector as a *self-managed connector*, use the *Microsoft Teams* tile from the connectors list *Customized connector* workflow. - -For additional operations, see <>. - -[discrete#es-connectors-microsoft-teams-connecting-to-microsoft-teams] -===== Connecting to Microsoft Teams - -To connect to Microsoft Teams you need to https://learn.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal[create an Azure Active Directory application and service principal] that can access resources. Follow these steps: - -1. Go to the https://portal.azure.com[Azure portal] and sign in with your Azure account. -2. Navigate to the *Azure Active Directory* service. -3. Select *App registrations* from the left-hand menu. -4. Click on the *New registration* button to register a new application. -5. Provide a *name* for your app, and _optionally_ select the supported account types (e.g., single tenant, multi-tenant). -6. Click on the *Register* button to create the app registration. -7. After the registration is complete, you will be redirected to the app's overview page. Take note of the *Application (client) ID* value, as you'll need it later. -8. Scroll down to the *API permissions* section and click on the "Add a permission" button. -9. In the "Request API permissions pane, select "Microsoft Graph" as the API. -10. Select the following *permissions*: -+ -* `TeamMember.Read.All` (Delegated) -* `Team.ReadBasic.All` (Delegated) -* `TeamsTab.Read.All` (Delegated) -* `Group.Read.All` (Delegated) -* `ChannelMessage.Read.All` (Delegated) -* `Chat.Read` (Delegated) & `Chat.Read.All` (Application) -* `Chat.ReadBasic` (Delegated) & `Chat.ReadBasic.All` (Application) -* `Files.Read.All` (Delegated and Application) -* `Calendars.Read` (Delegated and Application) -+ -11. Click on the *Add permissions* button to add the selected permissions to your app. -12. Click on the *Grant admin consent* button to grant the required permissions to the app. This step requires administrative privileges. *If you are not an admin, you need to request the admin to grant consent via their Azure Portal*. -13. Under the "Certificates & Secrets" tab, go to *Client Secrets*. -Generate a new client secret and keep a note of the string under the `Value` column. - -After completion, use the following configuration parameters to configure the connector. - -[discrete#es-connectors-microsoft-teams-configuration] -===== Configuration - -The following configuration fields are required: - -`client_id` (required):: -Unique identifier for your Azure Application, found on the app's overview page. Example: -* `ab123453-12a2-100a-1123-93fd09d67394` - -`secret_value` (required):: -String value that the application uses to prove its identity when requesting a token, available under the `Certificates & Secrets` tab of your Azure application menu. Example: -* `eyav1~12aBadIg6SL-STDfg102eBfCGkbKBq_Ddyu` - -`tenant_id` (required):: -Unique identifier for your Azure Active Directory instance, found on the app's overview page. Example: -* `123a1b23-12a3-45b6-7c8d-fc931cfb448d` - -`username` (required):: -Username for your Azure Application. Example: -* `dummy@3hmr2@onmicrosoft.com` - -`password` (required):: -Password for your Azure Application. Example: -* `changeme` - -[discrete#es-connectors-microsoft-teams-client-docker] -====== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-microsoft-teams-content-extraction] -====== Content Extraction - -Refer to <>. - -[discrete#es-connectors-microsoft-teams-documents-and-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *USER_CHATS_MESSAGE* -* *USER_CHAT_TABS* -* *USER_CHAT_ATTACHMENT* -* *USER_CHAT_MEETING_RECORDING* -* *USER_MEETING* -* *TEAMS* -* *TEAM_CHANNEL* -* *CHANNEL_TAB* -* *CHANNEL_MESSAGE* -* *CHANNEL_MEETING* -* *CHANNEL_ATTACHMENT* -* *CALENDAR_EVENTS* - -[NOTE] -==== -* Files bigger than 10 MB won't be extracted. -* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-microsoft-teams-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-microsoft-teams-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-microsoft-teams-advanced-sync-rules] -===== Advanced Sync Rules - -Advanced sync rules are not available for this connector in the present version. - -[discrete#es-connectors-microsoft-teams-end-to-end-testing] -===== End-to-end Testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the Teams connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=microsoft_teams ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=microsoft_teams DATA_SIZE=small ----- - -[discrete#es-connectors-microsoft-teams-known-issues] -===== Known issues - -* Messages in one-on-one chats for _Chat with Self_ users are not fetched via Graph APIs. Therefore, these messages won't be indexed into Elasticsearch. - -Refer to <> for a list of known issues for all connectors. - -[discrete#es-connectors-microsoft-teams-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-microsoft-teams-security] -===== Security - -See <>. - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/connectors-troubleshooting.asciidoc b/docs/reference/connector/docs/connectors-troubleshooting.asciidoc deleted file mode 100644 index 798e4e13a253d..0000000000000 --- a/docs/reference/connector/docs/connectors-troubleshooting.asciidoc +++ /dev/null @@ -1,14 +0,0 @@ -[#es-connectors-troubleshooting] -=== Troubleshooting connectors -++++ -Troubleshooting -++++ - -Use the following actions to help diagnose and resolve issues with <> and <>: - -* <>. -* <>. -* <>. -* Edit your index configuration: <>, <>. - -You can also request help or support. diff --git a/docs/reference/connector/docs/connectors-usage.asciidoc b/docs/reference/connector/docs/connectors-usage.asciidoc deleted file mode 100644 index e48c503971e42..0000000000000 --- a/docs/reference/connector/docs/connectors-usage.asciidoc +++ /dev/null @@ -1,225 +0,0 @@ -[#es-connectors-usage] -== Connectors UI in {kib} - -This document describes operations available to <> and <>, using the UI. - -In the Kibana UI, navigate to *Search > Content > Connectors* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. Here, you can view a summary of all your connectors and sync jobs, and to create new connectors. - -[TIP] -==== -In 8.12 we introduced a set of {ref}/connector-apis.html[Connector APIs] to create and manage Elastic connectors and sync jobs, along with a https://github.com/elastic/connectors/blob/main/docs/CLI.md[CLI tool]. -Use these tools if you'd like to work with connectors and sync jobs programmatically, without using the UI. -==== - -[discrete#es-connectors-usage-index-create] -=== Create and configure connectors - -You connector writes data to an {es} index. - -To create <> or self-managed <>, use the buttons under *Search > Content > Connectors*. -Once you've chosen the data source type you'd like to sync, you'll be prompted to create an {es} index. - -[discrete#es-connectors-usage-indices] -=== Manage connector indices - -View and manage all Elasticsearch indices managed by connectors. - -In the {kib} UI, navigate to *Search > Content > Connectors* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. Here, you can view a list of connector indices and their attributes, including connector type health and ingestion status. - -Within this interface, you can choose to view the details for each existing index or delete an index. -Or, you can <>. - -These operations require access to Kibana and additional index privileges. - -[discrete#es-connectors-usage-index-create-configure-existing-index] -=== Customize connector index mappings and settings - -{es} stores your data as documents in an index. Each index is made up of a set of fields and each field has a type (such as `keyword`, `boolean`, or `date`). - -*Mapping* is the process of defining how a document, and the fields it contains, are stored and indexed. -Connectors use {ref}/dynamic-field-mapping.html[dynamic mapping] to automatically create mappings based on the data fetched from the source. - -Index *settings* are configurations that can be adjusted on a per-index basis. They control things like the index's performance, the resources it uses, and how it should handle operations. - -When you create an index with a connector, the index is created with _default_ search-optimized field template mappings and index settings. Mappings for specific fields are then dynamically created based on the data fetched from the source. - -You can inspect your index mappings in the following ways: - -* *In the {kib} UI*: Navigate to *Search > Content > Indices > _YOUR-INDEX_ > Index Mappings* -* *By API*: Use the {ref}/indices-get-mapping.html[Get mapping API] - -You can manually *edit* the mappings and settings via the {es} APIs: - -* Use the {ref}/indices-put-mapping.html[Put mapping API] to update index mappings. -* Use the {ref}/indices-update-settings.html[Update index settings API] to update index settings. - -It's important to note that these updates are more complex when the index already contains data. - -Refer to the following sections for more information. - -[discrete#es-connectors-usage-index-create-configure-existing-index-no-data] -==== Customize mappings and settings before syncing data - -Updating mappings and settings is simpler when your index has no data. -If you create and attach a _new_ index while setting up a connector, you can customize the mappings and settings before syncing data, using the APIs mentioned earlier. - -[discrete#es-connectors-usage-index-create-configure-existing-index-have-data] -==== Customize mappings and settings after syncing data - -Once data has been added to {es} using dynamic mappings, you can't directly update existing field mappings. -If you've already synced data into an index and want to change the mappings, you'll need to {ref}/docs-reindex.html[reindex your data]. - -The workflow for these updates is as follows: - -. {ref}/indices-create-index.html[Create] a new index with the desired mappings and settings. -. {ref}/docs-reindex.html[Reindex] your data from the old index into this new index. -. Delete the old index. -. (Optional) Use an {ref}/aliases.html[alias], if you want to retain the old index name. -. Attach your connector to the new index or alias. - -[discrete#es-connectors-usage-syncs-recurring] -=== Manage recurring syncs - -After creating an index to be managed by a connector, you can configure automatic, recurring syncs. - -In the {kib} UI, navigate to *Search > Content > Connectors* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. - -Choose the index to configure, and then choose the *Scheduling* tab. - -Within this interface, you can enable or disable scheduled: - -. Full content syncs -. Incremental content syncs (if supported) -. Access control syncs (if supported) - -When enabled, you can additionally manage the sync schedule. - -This operation requires access to Kibana and the `write` {ref}/security-privileges.html[indices privilege^] for the `.elastic-connectors` index. - -Alternatively, you can <>. - -After you enable recurring syncs or sync once, the first sync will begin. -(There may be a short delay before the connector service begins the first sync.) -You may want to <> to see the status or errors, or <>. - -[discrete#es-connectors-usage-syncs-manual] -=== Sync once - -After creating the index to be managed by a connector, you can request a single sync at any time. - -In the {kib} UI, navigate to *Search > Content > Elasticsearch indices* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. - -Then choose the index to sync. - -Regardless of which tab is active, the *Sync* button is always visible in the top right. -Choose this button to reveal sync options: - -. Full content -. Incremental content (if supported) -. Access control (if supported) - -Choose one of the options to request a sync. -(There may be a short delay before the connector service begins the sync.) - -This operation requires access to Kibana and the `write` {ref}/security-privileges.html[indices privilege^] for the `.elastic-connectors` index. - -[discrete#es-connectors-usage-syncs-cancel] -=== Cancel sync - -After a sync has started, you can cancel the sync before it completes. - -In the {kib} UI, navigate to *Search > Content > Elasticsearch indices* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. - -Then choose the index with the running sync. - -Regardless of which tab is active, the *Sync* button is always visible in the top right. -Choose this button to reveal sync options, and choose *Cancel Syncs* to cancel active syncs. -This will cancel the running job, and marks all _pending_ and _suspended_ jobs as canceled as well. -(There may be a short delay before the connector service cancels the syncs.) - -This operation requires access to Kibana and the `write` {ref}/security-privileges.html[indices privilege^] for the `.elastic-connectors` and `.elastic-connectors-sync-jobs` index. - -[discrete#es-connectors-usage-index-view] -=== View status - -View the index details to see a variety of information that communicate the status of the index and connector. - -In the {kib} UI, navigate to *Search > Content > Elasticsearch indices* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. - -Then choose the index to view. - -The *Overview* tab presents a variety of information, including: - -* General information about the connector index, for example: name, description, ingestion type, connector type, and language analyzer. -* Any errors affecting the connector or sync process. -* The current ingestion status (see below for possible values). -* The current document count. - -Possible values of ingestion status: - -* Incomplete - A connector that is not configured yet. -* Configured - A connector that is configured. -* Connected - A connector that can successfully connect to a data source. -* Error - A connector that failed to connect to the data source. -* Connector failure - A connector that has not seen any update for more than 30 minutes. -* Sync failure - A connector that failed in the last sync job. - -This tab also displays the recent sync history, including sync status (see below for possible values). - -Possible values of sync status: - -* Sync pending - The initial job status, the job is pending to be picked up. -* Sync in progress - The job is running. -* Canceling sync - Cancelation of the job has been requested. -* Sync canceled - The job was canceled -* Sync suspended - The job was suspended due to service shutdown, and it can be resumed when the service restarts. -* Sync complete - The job completed successfully. -* Sync failure - The job failed. - -For each sync, choose the `view` button to display the job details, including: - -* The job ID -* Document stats, including: number of documents added/deleted, total number of documents, and volume of documented added -* Event logs -* Sync rules that were active when the sync was requested -* Pipelines that were active when the sync was requested - -This operation requires access to Kibana and the `read` {ref}/security-privileges.html[indices privilege^] for the `.elastic-connectors` index. - -[discrete#es-connectors-usage-documents] -=== View documents - -View the documents the connector has synced from the data. -Additionally view the index mappings to determine the current document schema. - -In the {kib} UI, navigate to *Search > Content > Elasticsearch indices* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. - -Then choose the index to view. - -Choose the *Documents* tab to view the synced documents. -Choose the *Index Mappings* tab to view the index mappings that were created by the connector. - -When setting up a new connector, ensure you are getting the documents and fields you were expecting from the data source. -If not, see <> for help. - -These operations require access to Kibana and the `read` and `manage` {ref}/security-privileges.html[indices privileges^] for the index containing the documents. - -See <> for security details. - -[discrete#es-connectors-usage-sync-rules] -=== Manage sync rules - -Use <> to limit which documents are fetched from the data source, or limit which fetched documents are stored in Elastic. - -In the {kib} UI, navigate to *Search > Content > Elasticsearch indices* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. - -Then choose the index to manage and choose the *Sync rules* tab. - -[discrete#es-connectors-usage-pipelines] -=== Manage ingest pipelines - -Use {ref}/ingest-pipeline-search.html[ingest pipelines] to transform fetched data before it is stored in Elastic. - -In the {kib} UI, navigate to *Search > Content > Elasticsearch indices* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. - -Then choose the index to manage and choose the *Pipelines* tab. diff --git a/docs/reference/connector/docs/connectors-use-cases.asciidoc b/docs/reference/connector/docs/connectors-use-cases.asciidoc deleted file mode 100644 index 0fd6e81a8e483..0000000000000 --- a/docs/reference/connector/docs/connectors-use-cases.asciidoc +++ /dev/null @@ -1,11 +0,0 @@ -[#es-connectors-use-cases] -== Connectors use cases -++++ -Use cases -++++ - -Learn how to use connectors for your use case. - -* <> - -include::connectors-architecture.asciidoc[] \ No newline at end of file diff --git a/docs/reference/connector/docs/connectors-zoom.asciidoc b/docs/reference/connector/docs/connectors-zoom.asciidoc deleted file mode 100644 index d945a0aec3da1..0000000000000 --- a/docs/reference/connector/docs/connectors-zoom.asciidoc +++ /dev/null @@ -1,364 +0,0 @@ -[#es-connectors-zoom] -=== Elastic Zoom connector reference -++++ -Zoom -++++ -// Attributes used in this file -:service-name: Zoom -:service-name-stub: zoom - -The Zoom connector is written in Python using the {connectors-python}[Elastic connector framework^]. - -View the {connectors-python}/connectors/sources/{service-name-stub}.py[*source code* for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). - - -// //////// //// //// //// //// //// //// //////// -// //////// NATIVE CONNECTOR REFERENCE (MANAGED SERVICE) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-zoom-native-connector-reference] -==== *Elastic managed connector reference* - -.View *Elastic managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-zoom-connector-availability-and-prerequisites] -===== Availability and prerequisites - -This managed connector was introduced in Elastic *8.14.0* as a managed service on Elastic Cloud. - -To use this connector natively in Elastic Cloud, satisfy all <>. - -[NOTE] -==== -This connector is in **technical preview** and is subject to change. -The design and code is less mature than official GA features and is being provided as-is with no warranties. -Technical preview features are not subject to the support SLA of official GA features. -==== - -[discrete#es-connectors-zoom-create-native-connector] -===== Create a {service-name} connector -include::_connectors-create-native.asciidoc[] - -[discrete#es-connectors-zoom-connector-usage] -===== Usage - -To use this connector in the UI, select the *Zoom* tile when creating a new connector under *Search -> Connectors*. - -If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. - -For additional operations, see <>. - -[discrete#es-connectors-zoom-connector-connecting-to-zoom] -===== Connecting to Zoom - -To connect to Zoom you need to https://developers.zoom.us/docs/internal-apps/s2s-oauth/[create an Server-to-Server OAuth application] that can access resources. Follow these steps: - -1. Go to the https://marketplace.zoom.us/[Zoom App Marketplace] and sign in with your Zoom account. -2. Navigate to the "Develop" service. -3. Select "Build App" from the dropdown menu. -4. Click on the "Server-to-Server OAuth" button to register a new application. -5. Provide a name for your app. -6. Click on the "Create" button to create the app registration. -7. After the registration is complete, you will be redirected to the app's overview page. Take note of the "App Credentials" value, as you'll need it later. -8. Navigate to the "Scopes" section and click on the "Add Scopes" button. -9. The following granular scopes need to be added to the app. -+ -[source,bash] ----- -user:read:list_users:admin -meeting:read:list_meetings:admin -meeting:read:list_past_participants:admin -cloud_recording:read:list_user_recordings:admin -team_chat:read:list_user_channels:admin -team_chat:read:list_user_messages:admin ----- -[NOTE] -==== -The connector requires a minimum scope of `user:read:list_users:admin` to ingest data into Elasticsearch. -==== -+ -10. Click on the "Done" button to add the selected scopes to your app. -11. Navigate to the "Activation" section and input the necessary information to activate the app. - -After completion, use the following configuration parameters to configure the connector. - -[discrete#es-connectors-zoom-connector-configuration] -===== Configuration - -The following configuration fields are required: - -`Zoom application Account ID`:: (required) -"Account ID" is a unique identifier associated with a specific Zoom account within the Zoom platform, found on the app's overview page. Example: - -* `KVx-aQssTOutOAGrDfgMaA` - -`Zoom application Client ID`:: (required) -"Client ID" refers to a unique identifier associated with an application that integrates with the Zoom platform, found on the app's overview page. Example: - -* `49Z69_rnRiaF4JYyfHusw` - -`Zoom application Client Secret`:: (required) -The "Client Secret" refers to a confidential piece of information generated when developers register an application on the Zoom Developer Portal for integration with the Zoom platform, found on the app's overview page. Example: - -* `eieiUJRsiH543P5NbYadavczjkqgdRTw` - -`Recording Age Limit (Months)`:: (required) -How far back in time to request recordings from Zoom. Recordings older than this will not be indexed. This configuration parameter allows you to define a time limit, measured in months, for which recordings will be indexed. - -`Fetch past meeting details`:: -Retrieve more information about previous meetings, including their details and participants. Default value is `False`. Enable this option to fetch past meeting details. This setting can increase sync time. - -[discrete#es-connectors-zoom-connector-content-extraction] -====== Content Extraction - -Refer to <>. - -[discrete#es-connectors-zoom-connector-documents-and-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Users* -* *Live Meetings* -* *Upcoming Meetings* -* *Past Meetings* -* *Recordings* -* *Channels* -* *Chat Messages* -* *Chat Files* - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted. (Self-managed connectors can use the <> to handle larger binary files.) -* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-zoom-connector-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-zoom-connector-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-zoom-connector-advanced-sync-rules] -===== Advanced Sync Rules - -Advanced sync rules are not available for this connector in the present version. - -[discrete#es-connectors-zoom-connector-known-issues] -===== Known issues - -* *Meetings*: Users can only index meetings that are less than a month old. -* *Chat Messages & Files*:Users can only index chats and files that are less than 6 months old. - -Refer to <> for a list of known issues for _all_ connectors. - -[discrete#es-connectors-zoom-connector-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-zoom-connector-security] -===== Security - -See <>. - - -// Closing the collapsible section -=============== - - -// //////// //// //// //// //// //// //// //////// -// //////// CONNECTOR CLIENT REFERENCE (SELF-MANAGED) /////// -// //////// //// //// //// //// //// //// //////// - -[discrete#es-connectors-zoom-connector-client-reference] -==== *Self-managed connector reference* - -.View *self-managed connector* reference -[%collapsible] -=============== - -[discrete#es-connectors-zoom-client-connector-availability-and-prerequisites] -===== Availability and prerequisites - -This connector is available as a self-managed *self-managed connector*. To use this connector, satisfy all <>. - -[NOTE] -==== -This connector is in *technical preview* and is subject to change. -The design and code is less mature than official GA features and is being provided as-is with no warranties. -Technical preview features are not subject to the support SLA of official GA features. -==== - -[discrete#es-connectors-zoom-client-create-connector-client] -===== Create a {service-name} connector -include::_connectors-create-client.asciidoc[] - -[discrete#es-connectors-zoom-client-connector-usage] -===== Usage - -To use this connector in the UI, select the *Teams* tile when creating a new connector under *Search -> Connectors*. - -If you're already familiar with how connectors work, you can also use the {ref}/connector-apis.html[Connector APIs]. - -For additional operations, see <>. - -[discrete#es-connectors-zoom-client-connector-connecting-to-zoom] -===== Connecting to Zoom - -To connect to Zoom you need to https://developers.zoom.us/docs/internal-apps/s2s-oauth/[create an Server-to-Server OAuth application] that can access resources. Follow these steps: - -1. Go to the https://marketplace.zoom.us/[Zoom App Marketplace] and sign in with your Zoom account. -2. Navigate to the "Develop" service. -3. Select "Build App" from the dropdown menu. -4. Click on the "Server-to-Server OAuth" button to register a new application. -5. Provide a name for your app. -6. Click on the "Create" button to create the app registration. -7. After the registration is complete, you will be redirected to the app's overview page. Take note of the "App Credentials" value, as you'll need it later. -8. Navigate to the "Scopes" section and click on the "Add Scopes" button. -9. The following granular scopes need to be added to the app. -+ -[source,bash] ----- -user:read:list_users:admin -meeting:read:list_meetings:admin -meeting:read:list_past_participants:admin -cloud_recording:read:list_user_recordings:admin -team_chat:read:list_user_channels:admin -team_chat:read:list_user_messages:admin ----- -[NOTE] -==== -The connector requires a minimum scope of `user:read:list_users:admin` to ingest data into Elasticsearch. -==== -+ -10. Click on the "Done" button to add the selected scopes to your app. -11. Navigate to the "Activation" section and input the necessary information to activate the app. - -After completion, use the following configuration parameters to configure the connector. - -[discrete#es-connectors-zoom-client-connector-configuration] -===== Configuration - -The following configuration fields are required: - -`Zoom application Account ID`:: (required) -"Account ID" is a unique identifier associated with a specific Zoom account within the Zoom platform, found on the app's overview page. Example: - -* `KVx-aQssTOutOAGrDfgMaA` - -`Zoom application Client ID`:: (required) -"Client ID" refers to a unique identifier associated with an application that integrates with the Zoom platform, found on the app's overview page. Example: - -* `49Z69_rnRiaF4JYyfHusw` - -`Zoom application Client Secret`:: (required) -The "Client Secret" refers to a confidential piece of information generated when developers register an application on the Zoom Developer Portal for integration with the Zoom platform, found on the app's overview page. Example: - -* `eieiUJRsiH543P5NbYadavczjkqgdRTw` - -`Recording Age Limit (Months)`:: (required) -How far back in time to request recordings from Zoom. Recordings older than this will not be indexed. This configuration parameter allows you to define a time limit, measured in months, for which recordings will be indexed. - -`Fetch past meeting details`:: -Retrieve more information about previous meetings, including their details and participants. Default value is `False`. Enable this option to fetch past meeting details. This setting can increase sync time. - -[discrete#es-connectors-zoom-client-client-docker] -====== Deployment using Docker - -include::_connectors-docker-instructions.asciidoc[] - -[discrete#es-connectors-zoom-client-connector-content-extraction] -====== Content Extraction - -Refer to <>. - -[discrete#es-connectors-zoom-client-connector-documents-and-syncs] -===== Documents and syncs - -The connector syncs the following objects and entities: - -* *Users* -* *Live Meetings* -* *Upcoming Meetings* -* *Past Meetings* -* *Recordings* -* *Channels* -* *Chat Messages* -* *Chat Files* - -[NOTE] -==== -* Content from files bigger than 10 MB won't be extracted by default. You can use the <> to handle larger binary files. -* Permissions are not synced. *All documents* indexed to an Elastic deployment will be visible to *all users with access* to that Elastic Deployment. -==== - -[discrete#es-connectors-zoom-client-connector-sync-types] -====== Sync types - -<> are supported by default for all connectors. - -This connector also supports <>. - -[discrete#es-connectors-zoom-client-connector-sync-rules] -===== Sync rules - -<> are identical for all connectors and are available by default. - -[discrete#es-connectors-zoom-client-connector-advanced-sync-rules] -===== Advanced Sync Rules - -Advanced sync rules are not available for this connector in the present version. - -[discrete#es-connectors-zoom-client-connector-connector-client-operations] -===== Connector Client operations - -[discrete#es-connectors-zoom-client-connector-end-to-end-testing] -====== End-to-end Testing - -The connector framework enables operators to run functional tests against a real data source. -Refer to <> for more details. - -To perform E2E testing for the Zoom connector, run the following command: - -[source,shell] ----- -$ make ftest NAME=zoom ----- - -For faster tests, add the `DATA_SIZE=small` flag: - -[source,shell] ----- -make ftest NAME=zoom DATA_SIZE=small ----- - -[discrete#es-connectors-zoom-client-connector-known-issues] -===== Known issues - -* *Meetings*: Users can only index meetings that are less than a month old. -* *Chat Messages & Files*:Users can only index chats and files that are less than 6 months old. - -Refer to <> for a list of known issues for _all_ connectors. - -[discrete#es-connectors-zoom-client-connector-troubleshooting] -===== Troubleshooting - -See <>. - -[discrete#es-connectors-zoom-client-connector-security] -===== Security - -See <>. - - -// Closing the collapsible section -=============== diff --git a/docs/reference/connector/docs/dls-e2e-guide.asciidoc b/docs/reference/connector/docs/dls-e2e-guide.asciidoc deleted file mode 100644 index 3670ed0730bc7..0000000000000 --- a/docs/reference/connector/docs/dls-e2e-guide.asciidoc +++ /dev/null @@ -1,439 +0,0 @@ -[#es-dls-e2e-guide] -=== Leverage document-level security from connectors in Search Applications -++++ -DLS in Search Applications -++++ - -This guide explains how to ensure document-level security (DLS) for documents ingested by <>, when building a search application. - -In this example we will: - -* Set up the SharePoint Online connector to ingest data from SharePoint Online -* Set up a *Search Application* using the Elasticsearch index created by the SharePoint Online connector -* Create Elasticsearch *API keys* with DLS and workflow restrictions to query your Search Application -* Build a search experience where authenticated users can search over the data ingested by connectors - -[discrete#es-dls-e2e-guide-connector-setup] -==== Set up connector to sync data with access control - -You can run SharePoint Online connector in Elastic Cloud (native) or on a self-managed deployment (self-managed connector). -Refer to <> to learn how to set up the SharePoint Online connector and enable DLS. - - -To run the self-managed connector, you'll need to run the *connectors service* in addition to your Elastic deployment. -Refer to <> for details on how to set up a self-managed connector and run the connectors service. - -[TIP] -==== -This guide assumes you already have an Elastic deployment, that satisfies the <> for running the connectors service. -If you don't have an Elastic deployment, sign up for a https://cloud.elastic.co/registration[free Elastic Cloud trial^]. -==== - -[NOTE] -==== -We use the SharePoint Online connector in this concrete example. -Refer to <> for a list of connectors that support DLS. -==== - -[discrete#es-dls-e2e-guide-sharepoint-data-overview] -==== Elasticsearch indices overview - -When the SharePoint Online connector is set up and you've started syncing content, the connector will create two separate Elasticsearch indices: - -* A *content* index that holds the searchable data in SharePoint Online. -We'll use this index to create our search application. -* An *access control* index that includes access control data for each user that has access to SharePoint Online. -It will be named `.search-acl-filter-`, where `` is the index name you chose. -For example, an index named `search-sharepoint` would have the ACL filter index `.search-acl-filter-search-sharepoint`. -We'll use this index to create Elasticsearch API keys that control access to the content index. - -[discrete#es-dls-e2e-guide-search-application-create] -==== Create a Search Application - -To build our search experience for our SharePoint Online data, we need to create a Search Application. - -Follow these steps to create a Search Application in the Kibana UI: - -. Navigate to *Search > Search Applications* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. -. Select *Create*. -. *Name* the Search Application. -. Select the *index* used by the SharePoint Online connector. -. Select *Create*. - -Alternatively, you can use the {ref}/put-search-application.html[Put Search Application] API. - -[discrete#es-dls-e2e-guide-elasticsearch-api-keys-setup] -==== Create Elasticsearch API keys - -Next we need to create Elasticsearch API keys to restrict queries to the search application. -These restrictions will ensure that users can only query documents they have access to. -To create this API key, we will leverage information in the access control index created by the connector. - -The access control index will contain documents similar to this example: - -[source,js] ----- -{ - "_index": ".search-acl-filter-search-sharepoint", - "_id": "john@example.co", - "_version": 1, - "_seq_no": 0, - "_primary_term": 1, - "found": true, - "_source": { - "identity": { - "email": "john@example.co", - "access_control": [ - "john@example.co", - "Engineering Members" - ] - }, - "query": { - "template": { - "params": { - "access_control": [ - "john@example.co", - "Engineering Members" - ] - }, - "source": """ - { - "bool": { - "should": [ - { - "bool": { - "must_not": { - "exists": { - "field": "_allow_access_control" - } - } - } - }, - { - "terms": { - "_allow_access_control.enum": {{#toJson}}access_control{{/toJson}} - } - } - ] - } - } - """ - } - } - } -} ----- -// NOTCONSOLE - -This document contains the Elasticsearch query that describes which documents the user `john@example.com` has access to. -The access control information is stored in the `access_control` field. -In this case the user has access only to documents that contain `"john@example.co"` or `"Engineering Members"` in the `_allow_access_control` field. - -The `query` field contains the DLS query we will use to create an Elasticsearch API key. -That key will ensure queries are restricted to the documents `john@example.com` has access to. - -To create the API key, we will use the {ref}/security-api-create-api-key.html[Create API Key] API. -The API call will look like this: - -[source,console] ----- -POST /_security/api_key -{ - "name": "john-api-key", - "expiration": "1d", - "role_descriptors": { - "sharepoint-online-role": { - "index": [ - { - "names": [ - "sharepoint-search-application" - ], - "privileges": [ - "read" - ], - "query": { - "template": { - "params": { - "access_control": [ - "john@example.co", - "Engineering Members" - ] - }, - "source": """ - { - "bool": { - "should": [ - { - "bool": { - "must_not": { - "exists": { - "field": "_allow_access_control" - } - } - } - }, - { - "terms": { - "_allow_access_control.enum": {{#toJson}}access_control{{/toJson}} - } - } - ] - } - } - """ - } - } - } - ], - "restriction": { - "workflows": [ - "search_application_query" - ] - } - } - } -} ----- -// TEST[skip:TODO] - -The response will look like this: - -[source,js] ----- -{ - "id": "0rCD3i-MjKsw4g9BpRIBa", - "name": "john-api-key", - "expiration": 1687881715555, - "api_key": "zTxre9L6TcmRIgd2NgLCRg", - "encoded": "Qk05dy1JZ0JhRDNyNGpLQ3MwUmk6elRzdGU5QjZUY21SSWdkMldnQ1RMZw==" -} ----- -// NOTCONSOLE - -The `api_key` field contains the API key that can be used to query the Search Application with the appropriate DLS restrictions. - -[discrete#es-dls-e2e-guide-elasticsearch-querying-multiple-indices] -===== Querying multiple indices - -This section describes how to generate an API key to query a search application that contains multiple indices with documents ingested by connectors with DLS. - -A user might have multiple identities that define which documents they are allowed to read. -In this case we want to create a single Elasticsearch API key that can be used to query only the documents this user has access to. - -Let's assume we want to create an API key that combines the following user identities: - -[source,js] ----- -GET .search-acl-filter-source1 -{ - "_id": "example.user@example.com", - "identity": { - "username": "example username", - "email": "example.user@example.com" - }, - "query": { - "template": { - "params": { - "access_control": [ - "example.user@example.com", - "source1-user-group"] - } - }, - "source": "..." - } -} ----- -// NOTCONSOLE - -[source,js] ----- -GET .search-acl-filter-source2 -{ - "_id": "example.user@example.com", - "identity": { - "username": "example username", - "email": "example.user@example.com" - }, - "query": { - "template": { - "params": { - "access_control": [ - "example.user@example.com", - "source2-user-group"] - } - }, - "source": "..." - } -} ----- -// NOTCONSOLE - -`.search-acl-filter-source1` and `.search-acl-filter-source2` define the access control identities for `source1` and `source2`. - -The following script exemplifies how to generate the Elasticsearch API key that combines multiple user identities: - -[source,js] ----- -require("dotenv").config(); -const axios = require("axios"); - -// Elasticsearch URL and creds retrieved from environment variables -const ELASTICSEARCH_URL = process.env.ELASTICSEARCH_URL; -const ELASTICSEARCH_USER = process.env.ELASTICSEARCH_USER; -const ELASTICSEARCH_PASSWORD = process.env.ELASTICSEARCH_PASSWORD; - -const config = { - auth: { - username: ELASTICSEARCH_USER, - password: ELASTICSEARCH_PASSWORD, - }, - headers: { - "Content-Type": "application/json", - }, -}; - -async function createApiKey({ - searchApplication, - userId, - indices = "", - metadata, - expiration = "1d" -}) { - try { - const indices = indices.split(","); - - let combinedQuery = { bool: { should: [] } }; - - for (const index of indices) { - const aclsIndex = `.search-acl-filter-${index}`; - const response = await axios.get( - `${ELASTICSEARCH_URL}/${aclsIndex}/_doc/${userId}`, - config - ); - combinedQuery.bool.should.push({ - bool: { - must: [ - { - term: { - "_index": index, - }, - }, - response.data._source.query.source, - ], - }, - }); - } - - if (!metadata || Object.keys(metadata).length === 0) { - metadata = { created_by: "create-api-key" }; - } - - const apiKeyBody = { - name: userId, - expiration, - role_descriptors: { - [`${searchApplication}-role`]: { - index: [ - { - names: [searchApplication], - privileges: ["read"], - query: combinedQuery, - }, - ], - restriction: { - workflows: ["search_application_query"], - }, - }, - }, - metadata, - }; - - const apiKeyResponse = await axios.post( - `${ELASTICSEARCH_URL}/_security/api_key`, - apiKeyBody, - config - ); - - console.log(apiKeyResponse.data); - return apiKeyResponse.data.encoded; - } catch (error) { - console.log(error) - } -} - -// example usage: -createApiKey({ - searchApplication: "my-search-app", - userId: "example.user@example.com", - indices: "source1,source2", - expiration: "1d", - metadata: { - application: "my-search-app", - namespace: "dev", - foo: "bar", - }, -}).then((encodedKey) => console.log(encodedKey)); - ----- -// NOTCONSOLE - -NOTE: The example combines multiple identities into a single role descriptor. This is because an Elasticsearch API key can use role restrictions only if it has a *single role descriptor*. - -[discrete#es-dls-e2e-guide-elasticsearch-api-keys-frontend-implementation] -==== Implementation in your frontend application - -If you're building a frontend application, use the `encoded` field to pass the API key to the frontend. -Your app can then use the API key to query the search application. -The workflow will look something like this: - -1. User signs in to your application. -2. Your application generates an Elasticsearch API key using the {ref}/security-api-create-api-key.html[Create API Key] API. -3. The `encoded` field is returned to the frontend application. -4. When the user searches for documents, the frontend application passes the `encoded` field to your search application's {ref}/search-application-search.html[`_search` endpoint]. -For example, you might use the https://github.com/elastic/search-application-client[Search Application client^] to make the actual queries using the API key: -+ -[source,js] ----- -const client = SearchApplicationClient(applicationName, endpoint, apiKey, params); ----- -// NOTCONSOLE - -Here's what this workflow looks like in a sequence diagram: - -[.screenshot] -image::images/dls-api-key-workflow.png[DLS API key and search application client workflow] - -[TIP] -==== -When creating an Elasticsearch API key for query Search Applications, you must include the `search_application_query` restriction. This will ensure the API key can only access the Search Application Search API. -==== - -[TIP] -==== -We recommend always setting an `expiration` time when creating an Elasticsearch API key. When `expiration` is not set, the Elasticsearch API will never expire. -==== - -[discrete#es-dls-e2e-guide-workflow-guidance] -==== Workflow guidance - -We recommend relying on the connector access control sync to automate and keep documents in sync with changes to the original content source's user permissions. - -In this workflow you will need to handle the generation of the Elasticsearch API key in the backend of your application, in response to browser sign ins. - -Once the key is generated, the backend will also need to return that key to the client (browser) to be used in subsequent search requests to your search application. - -The API key can be invalidated using the {ref}/security-api-invalidate-api-key.html[Invalidate API Key API]. -Additionally, if the user's permission changes, you'll need to update or recreate the Elasticsearch API key. - -[discrete#es-dls-e2e-guide-next-steps] -==== Next steps - -Learn how to use the Search Application client to query your Search Application. -See {ref}/search-application-client.html[Search Applications client]. - -[discrete#es-dls-e2e-guide-learn-more] -==== Learn more - -* <> -* <> -* {ref}/search-application-overview.html[Search Applications] diff --git a/docs/reference/connector/docs/dls-overview.asciidoc b/docs/reference/connector/docs/dls-overview.asciidoc deleted file mode 100644 index ec6bb43d955c7..0000000000000 --- a/docs/reference/connector/docs/dls-overview.asciidoc +++ /dev/null @@ -1,345 +0,0 @@ -[#es-dls-overview] -=== How DLS works - -Document level security (DLS) enables you to control access to content at the document level. -Access to each document in an index can be managed independently, based on the identities (such as usernames, emails, groups etc.) that are allowed to view it. - -This feature works with the help of special access control documents that are indexed by a connector into a hidden Elasticsearch index, associated with the standard content index. -If your content documents have access control fields that match the criteria defined in your access control documents, Elasticsearch will apply DLS to the documents synced by the connector. - -[discrete#es-dls-overview-core-concepts] -==== Core concepts - -At a very high level, there are two essential components that enable document level security with connectors: - -* *Access control documents*: These documents define the access control policy for documents from your third party source. -They live in a hidden index named with the following pattern: `.search-acl-filter-`. -See <> for more details and an example. -* *Content documents with access control fields*: The documents that contain the synced content from your third party source must have *access control fields* that match the criteria defined in your access control documents. -These documents live in an index named with the following pattern: `search-`. -** If a content document does not have access control fields, there will be no restrictions on who can view it. -** If the access control field is present but _empty_, no identities will have access and the document will be effectively invisible. -+ -See <> for more details. - -[discrete#es-dls-overview-procedure] -==== Enabling DLS - -To enable DLS, you need to perform the following steps: - -. First *enable DLS* for your connector as part of the connector configuration. -. Run an *Access control* sync. -. This creates a hidden access control index prefixed with `.search-acl-filter-`. For example, if you named your connector index `search-sharepoint`, the access control index would be named `.search-acl-filter-search-sharepoint`. -. The <> on the hidden index define which identities are allowed to view documents with access control fields. -. The access control document uses a search template to define how to filter search results based on identities. -. Schedule recurring *Access control* syncs to update the access control documents in the hidden index. - -Note the following details about content documents and syncs: - -. Remember that for DLS to work, your *content documents* must have access control fields that match the criteria defined in your access control documents. -<> contain the actual content your users will search for. -If a content document does not have access control fields, there will be no restrictions on who can view it. -. When a user searches for content, the access control documents determine which content the user is allowed to view. -. At _search_ time documents without the `_allow_access_control` field or with allowed values in `_allow_access_control.enum` will be returned in the search results. The logic for determining whether a document has access control enabled is based on the presence or values of the `_allow_access_control*` fields. -. Run *Content* syncs to sync your third party data source to Elasticsearch. -A specific field (or fields) within these documents correlates with the query parameters in the access control documents enabling document-level security (DLS). - -[NOTE] -==== -You must enable DLS for your connector _before_ running the first content sync. -If you have already run a content sync, you'll need to delete all documents on the index, enable DLS, and run a new content sync. -==== - -[discrete#es-dls-overview-index] -==== DLS at index time - -[discrete#es-dls-overview-access-control-documents] -===== Access control documents - -These documents define the access control policy for the data indexed into Elasticsearch. -An example of an access control document is as follows: - -[source,js] ----- -{ - "_id": "example.user@example.com", - "identity": { - "username": "example username", - "email": "example.user@example.com" - }, - "query": { - "template": { - "params": { - "access_control": [ - "example.user@example.com", - "example group", - "example username"] - } - }, - "source": "..." - } -} ----- -// NOTCONSOLE - -In this example, the identity object specifies the identity of the user that this document pertains to. -The `query` object then uses a template to list the parameters that form the access control policy for this identity. -It also contains the query `source`, which will specify a query to fetch all content documents the identity has access to. -The `_id` could be, for example, the email address or the username of a user. -The exact content and structure of `identity` depends on the corresponding implementation. - -[discrete#es-dls-overview-content-documents] -===== Content documents - -Content documents contain the actual data from your 3rd party source. -A specific field (or fields) within these documents correlates with the query parameters in the access control documents enabling document-level security (DLS). -Please note, the field names used to implement DLS may vary across different connectors. -In the following example we'll use the field `_allow_access_control` for specifying the access control for a user identity. - -[source,js] ----- -{ - "_id": "some-unique-id", - "key-1": "value-1", - "key-2": "value-2", - "key-3": "value-3", - "_allow_access_control": [ - "example.user@example.com", - "example group", - "example username" - ] -} ----- -// NOTCONSOLE - -[discrete#es-dls-overview-sync-type-comparison] -===== Access control sync vs content sync - -The ingestion of documents into an Elasticsearch index is known as a sync. -DLS is managed using two types of syncs: - -* *Content sync*: Ingests content into an index that starts with `search-`. - -* *Access control sync*: Separate, additional sync which ingests access control documents into index that starts with `.search-acl-filter-`. - -During a sync, the connector ingests the documents into the relevant index based on their type (content or access control). -The access control documents determine the access control policy for the content documents. - -By leveraging DLS, you can ensure that your Elasticsearch data is securely accessible to the right users or groups, based on the permissions defined in the access control documents. - -[discrete#es-dls-overview-search-time] -==== DLS at search time - -[discrete#es-dls-overview-search-time-identity-allowed] -===== When is an identity allowed to see a content document - -A user can view a document if at least one access control element in their access control document matches an item within the document's `_allow_access_control` field. - -[discrete#es-dls-overview-search-time-example] -====== Example -This section illustrates when a user has access to certain documents depending on the access control. - -One access control document: -[source,js] ----- -{ - "_id": "example.user@example.com", - "identity": { - "username": "example username", - "email": "example.user@example.com" - }, - "query": { - "template": { - "params": { - "access_control": [ - "example.user@example.com", - "example group", - "example username"] - } - }, - "source": "..." - } -} ----- -// NOTCONSOLE - -Let's see which of the following example documents these permissions can access, and why. -[source,js] ----- -{ - "_id": "some-unique-id-1", - "_allow_access_control": [ - "example.user@example.com", - "example group", - "example username" - ] -} ----- -// NOTCONSOLE - -The user `example username` will have access to this document as he's part of the corresponding group and his username and email address are also explicitly part of `_allow_access_control`. - -[source,js] ----- -{ - "_id": "some-unique-id-2", - "_allow_access_control": [ - "example group" - ] -} ----- -// NOTCONSOLE - -The user `example username` will also have access to this document as they are part of the `example group`. - -[source,js] ----- -{ - "_id": "some-unique-id-3", - "_allow_access_control": [ - "another.user@example.com" - ] -} ----- -// NOTCONSOLE - -The user `example username` won't have access to this document because their email does not match `another.user@example.com`. - -[source,js] ----- -{ - "_id": "some-unique-id-4", - "_allow_access_control": [] -} ----- -// NOTCONSOLE - -No one will have access to this document as the `_allow_access_control` field is empty. - -[discrete#es-dls-overview-multiple-connectors] -===== Querying multiple indices - -This section illustrates how to define an Elasticsearch API key that has restricted read access to multiple indices that have DLS enabled. - -A user might have multiple identities that define which documents they are allowed to read. -We can define an Elasticsearch API key with a role descriptor for each index the user has access to. - -[discrete#es-dls-overview-multiple-connectors-example] -====== Example - -Let's assume we want to create an API key that combines the following user identities: - -[source,js] ----- -GET .search-acl-filter-source1 -{ - "_id": "example.user@example.com", - "identity": { - "username": "example username", - "email": "example.user@example.com" - }, - "query": { - "template": { - "params": { - "access_control": [ - "example.user@example.com", - "source1-user-group"] - } - }, - "source": "..." - } -} ----- -// NOTCONSOLE - -[source,js] ----- -GET .search-acl-filter-source2 -{ - "_id": "example.user@example.com", - "identity": { - "username": "example username", - "email": "example.user@example.com" - }, - "query": { - "template": { - "params": { - "access_control": [ - "example.user@example.com", - "source2-user-group"] - } - }, - "source": "..." - } -} ----- -// NOTCONSOLE - -`.search-acl-filter-source1` and `.search-acl-filter-source2` define the access control identities for `source1` and `source2`. - -You can create an Elasticsearch API key using an API call like this: - -[source,console] ----- -POST /_security/api_key -{ - "name": "my-api-key", - "role_descriptors": { - "role-source1": { - "indices": [ - { - "names": ["source1"], - "privileges": ["read"], - "query": { - "template": { - "params": { - "access_control": [ - "example.user@example.com", - "source1-user-group"] - } - }, - "source": "..." - } - } - ] - }, - "role-source2": { - "indices": [ - { - "names": ["source2"], - "privileges": ["read"], - "query": { - "template": { - "params": { - "access_control": [ - "example.user@example.com", - "source2-user-group"] - } - }, - "source": "..." - } - } - ] - } - } -} - ----- -// TEST[skip:TODO] - -[discrete#es-dls-overview-multiple-connectors-workflow-guidance] -====== Workflow guidance - -We recommend relying on the connector access control sync to automate and keep documents in sync with changes to the original content source's user permissions. - -Consider setting an `expiration` time when creating an Elasticsearch API key. When `expiration` is not set, the Elasticsearch API will never expire. - -The API key can be invalidated using the {ref}/security-api-invalidate-api-key.html[Invalidate API Key API]. -Additionally, if the user's permission changes, you'll need to update or recreate the Elasticsearch API key. - -[discrete#es-dls-overview-search-time-learn-more] -===== Learn more - -* <> -* {ref}/document-level-security.html[Elasticsearch Document Level Security^] - diff --git a/docs/reference/connector/docs/dls.asciidoc b/docs/reference/connector/docs/dls.asciidoc deleted file mode 100644 index 2e8871ea9eb87..0000000000000 --- a/docs/reference/connector/docs/dls.asciidoc +++ /dev/null @@ -1,39 +0,0 @@ -[#es-dls] -== Document level security - -Document level security (DLS) enables you to restrict access to documents in your Elasticsearch indices according to user and group permissions. -This ensures search results only return authorized information for users, based on their permissions. - -[discrete#es-dls-availability-prerequisites] -=== Availability & prerequisites - -Support for DLS in Elastic connectors was introduced in version *8.9.0*. - -[NOTE] -==== -This feature is in *beta* and is subject to change. -The design and code is less mature than official GA features and is being provided as-is with no warranties. -Beta features are not subject to the support SLA of official GA features. -==== - -This feature is not available for all Elastic subscription levels. -Refer to the subscriptions pages for https://www.elastic.co/subscriptions/cloud[Elastic Cloud^] and https://www.elastic.co/subscriptions[Elastic Stack^]. - -DLS is available by default when using the following Elastic connectors: - -include::_connectors-list-dls.asciidoc[] - -Note that our standalone products (App Search and Workplace Search) do not use this feature. -Workplace Search has its own permissions management system. - -[discrete#es-dls-learn-more] -=== Learn more - -DLS documentation: - -* <> -* <> -* <> - -include::dls-overview.asciidoc[] -include::dls-e2e-guide.asciidoc[] diff --git a/docs/reference/connector/docs/images/analytics-collections-dashboard.png b/docs/reference/connector/docs/images/analytics-collections-dashboard.png deleted file mode 100644 index b99fc07bc5fab..0000000000000 Binary files a/docs/reference/connector/docs/images/analytics-collections-dashboard.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/analytics-explorer-dashboard.png b/docs/reference/connector/docs/images/analytics-explorer-dashboard.png deleted file mode 100644 index 922763585d67f..0000000000000 Binary files a/docs/reference/connector/docs/images/analytics-explorer-dashboard.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/analytics-overview-dashboard.png b/docs/reference/connector/docs/images/analytics-overview-dashboard.png deleted file mode 100644 index c088cd3994d1e..0000000000000 Binary files a/docs/reference/connector/docs/images/analytics-overview-dashboard.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/app-search-audit-log-table.png b/docs/reference/connector/docs/images/app-search-audit-log-table.png deleted file mode 100644 index ccf9147bdb6e8..0000000000000 Binary files a/docs/reference/connector/docs/images/app-search-audit-log-table.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/app-search-kibana-ui.png b/docs/reference/connector/docs/images/app-search-kibana-ui.png deleted file mode 100644 index 0e6b09b7f1bba..0000000000000 Binary files a/docs/reference/connector/docs/images/app-search-kibana-ui.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/app-search-settings.png b/docs/reference/connector/docs/images/app-search-settings.png deleted file mode 100644 index 9c8c31c81a6c5..0000000000000 Binary files a/docs/reference/connector/docs/images/app-search-settings.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/app-search-standalone-ui.png b/docs/reference/connector/docs/images/app-search-standalone-ui.png deleted file mode 100644 index f496d831b70ad..0000000000000 Binary files a/docs/reference/connector/docs/images/app-search-standalone-ui.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/basic-rule-example.png b/docs/reference/connector/docs/images/basic-rule-example.png deleted file mode 100644 index aa1d79bb6f274..0000000000000 Binary files a/docs/reference/connector/docs/images/basic-rule-example.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/blog-elastic-crawler-1.png b/docs/reference/connector/docs/images/blog-elastic-crawler-1.png deleted file mode 100644 index e2e5593c3e102..0000000000000 Binary files a/docs/reference/connector/docs/images/blog-elastic-crawler-1.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/blog-elastic-crawler-2.jpg b/docs/reference/connector/docs/images/blog-elastic-crawler-2.jpg deleted file mode 100644 index 3bc45743afbd0..0000000000000 Binary files a/docs/reference/connector/docs/images/blog-elastic-crawler-2.jpg and /dev/null differ diff --git a/docs/reference/connector/docs/images/blog-elastic-crawler-3.jpg b/docs/reference/connector/docs/images/blog-elastic-crawler-3.jpg deleted file mode 100644 index 6f7f4fe5c4b6d..0000000000000 Binary files a/docs/reference/connector/docs/images/blog-elastic-crawler-3.jpg and /dev/null differ diff --git a/docs/reference/connector/docs/images/build-a-connector-workflow.png b/docs/reference/connector/docs/images/build-a-connector-workflow.png deleted file mode 100644 index eb51863358e9a..0000000000000 Binary files a/docs/reference/connector/docs/images/build-a-connector-workflow.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/combine-engines-indices-content-sources.png b/docs/reference/connector/docs/images/combine-engines-indices-content-sources.png deleted file mode 100644 index 072f4cefff01b..0000000000000 Binary files a/docs/reference/connector/docs/images/combine-engines-indices-content-sources.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/connectors-overview.svg b/docs/reference/connector/docs/images/connectors-overview.svg deleted file mode 100644 index 0a7fb30c61d6d..0000000000000 --- a/docs/reference/connector/docs/images/connectors-overview.svg +++ /dev/null @@ -1,70 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/reference/connector/docs/images/convert-connector.png b/docs/reference/connector/docs/images/convert-connector.png deleted file mode 100644 index f07886d12d7fb..0000000000000 Binary files a/docs/reference/connector/docs/images/convert-connector.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/crawler-crawl-rules.png b/docs/reference/connector/docs/images/crawler-crawl-rules.png deleted file mode 100644 index 69c97418189d3..0000000000000 Binary files a/docs/reference/connector/docs/images/crawler-crawl-rules.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/crawler-events-logs-viewer-by-url.png b/docs/reference/connector/docs/images/crawler-events-logs-viewer-by-url.png deleted file mode 100644 index 2f05747d49398..0000000000000 Binary files a/docs/reference/connector/docs/images/crawler-events-logs-viewer-by-url.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/crawler-events-logs-viewer.png b/docs/reference/connector/docs/images/crawler-events-logs-viewer.png deleted file mode 100644 index 758b94e808661..0000000000000 Binary files a/docs/reference/connector/docs/images/crawler-events-logs-viewer.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/crawler-extraction-rules-content-field.png b/docs/reference/connector/docs/images/crawler-extraction-rules-content-field.png deleted file mode 100644 index 2b7b9f3d41cd9..0000000000000 Binary files a/docs/reference/connector/docs/images/crawler-extraction-rules-content-field.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/crawler-extraction-rules-url-filters.png b/docs/reference/connector/docs/images/crawler-extraction-rules-url-filters.png deleted file mode 100644 index 11be61bcce8fa..0000000000000 Binary files a/docs/reference/connector/docs/images/crawler-extraction-rules-url-filters.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/crawler-extraction-rules.png b/docs/reference/connector/docs/images/crawler-extraction-rules.png deleted file mode 100644 index 175f18e2eaf66..0000000000000 Binary files a/docs/reference/connector/docs/images/crawler-extraction-rules.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/crawler-iteration-cycle.png b/docs/reference/connector/docs/images/crawler-iteration-cycle.png deleted file mode 100644 index f013bd2ed0dcd..0000000000000 Binary files a/docs/reference/connector/docs/images/crawler-iteration-cycle.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/crawler-proxy-schematic.png b/docs/reference/connector/docs/images/crawler-proxy-schematic.png deleted file mode 100644 index 524182d2f6643..0000000000000 Binary files a/docs/reference/connector/docs/images/crawler-proxy-schematic.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/crawler-proxy-validation.png b/docs/reference/connector/docs/images/crawler-proxy-validation.png deleted file mode 100644 index 61f268f83f209..0000000000000 Binary files a/docs/reference/connector/docs/images/crawler-proxy-validation.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/crawler-scheduling-multiple-crawls.png b/docs/reference/connector/docs/images/crawler-scheduling-multiple-crawls.png deleted file mode 100644 index fcddae8dd1d04..0000000000000 Binary files a/docs/reference/connector/docs/images/crawler-scheduling-multiple-crawls.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/crawler-scheduling.png b/docs/reference/connector/docs/images/crawler-scheduling.png deleted file mode 100644 index f67a0d6b5fb5d..0000000000000 Binary files a/docs/reference/connector/docs/images/crawler-scheduling.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/discover-data-view-analytics.png b/docs/reference/connector/docs/images/discover-data-view-analytics.png deleted file mode 100644 index 676ed40098e99..0000000000000 Binary files a/docs/reference/connector/docs/images/discover-data-view-analytics.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/discover-lens-analytics.png b/docs/reference/connector/docs/images/discover-lens-analytics.png deleted file mode 100644 index 89701eca60bad..0000000000000 Binary files a/docs/reference/connector/docs/images/discover-lens-analytics.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/document-enrichment-add-inference-pipeline.png b/docs/reference/connector/docs/images/document-enrichment-add-inference-pipeline.png deleted file mode 100644 index ddcf42e24ab83..0000000000000 Binary files a/docs/reference/connector/docs/images/document-enrichment-add-inference-pipeline.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/document-enrichment-diagram.png b/docs/reference/connector/docs/images/document-enrichment-diagram.png deleted file mode 100644 index 89ae1d45e24d4..0000000000000 Binary files a/docs/reference/connector/docs/images/document-enrichment-diagram.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/elser-deploy-model.png b/docs/reference/connector/docs/images/elser-deploy-model.png deleted file mode 100644 index 46f5e8cc7229a..0000000000000 Binary files a/docs/reference/connector/docs/images/elser-deploy-model.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/elser-model-deployment.png b/docs/reference/connector/docs/images/elser-model-deployment.png deleted file mode 100644 index 1bcae4c85a5e1..0000000000000 Binary files a/docs/reference/connector/docs/images/elser-model-deployment.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/elser-model-started.png b/docs/reference/connector/docs/images/elser-model-started.png deleted file mode 100644 index c533f7b5123fb..0000000000000 Binary files a/docs/reference/connector/docs/images/elser-model-started.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/elser-pipeline-model-selection.png b/docs/reference/connector/docs/images/elser-pipeline-model-selection.png deleted file mode 100644 index 986071e77b36a..0000000000000 Binary files a/docs/reference/connector/docs/images/elser-pipeline-model-selection.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/elser-start-model.png b/docs/reference/connector/docs/images/elser-start-model.png deleted file mode 100644 index 81cdfa0eb58a0..0000000000000 Binary files a/docs/reference/connector/docs/images/elser-start-model.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/enable-rbac-app-search.png b/docs/reference/connector/docs/images/enable-rbac-app-search.png deleted file mode 100644 index 11ef21d55f07f..0000000000000 Binary files a/docs/reference/connector/docs/images/enable-rbac-app-search.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/enable-rbac-workplace-search.png b/docs/reference/connector/docs/images/enable-rbac-workplace-search.png deleted file mode 100644 index 45205d23cddfd..0000000000000 Binary files a/docs/reference/connector/docs/images/enable-rbac-workplace-search.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/kibana-setup-guide.png b/docs/reference/connector/docs/images/kibana-setup-guide.png deleted file mode 100644 index 2797472933102..0000000000000 Binary files a/docs/reference/connector/docs/images/kibana-setup-guide.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/kibana-ui.png b/docs/reference/connector/docs/images/kibana-ui.png deleted file mode 100644 index 4371f3a1052aa..0000000000000 Binary files a/docs/reference/connector/docs/images/kibana-ui.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/ldap-login.png b/docs/reference/connector/docs/images/ldap-login.png deleted file mode 100644 index b7dd2b9fce5fb..0000000000000 Binary files a/docs/reference/connector/docs/images/ldap-login.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/oidc-login.png b/docs/reference/connector/docs/images/oidc-login.png deleted file mode 100644 index 37753acc8a0f6..0000000000000 Binary files a/docs/reference/connector/docs/images/oidc-login.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/pipeline-copy-customize.png b/docs/reference/connector/docs/images/pipeline-copy-customize.png deleted file mode 100644 index 1f2bf99aa4f16..0000000000000 Binary files a/docs/reference/connector/docs/images/pipeline-copy-customize.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/pki-login-screen.png b/docs/reference/connector/docs/images/pki-login-screen.png deleted file mode 100644 index 9fec19564adb3..0000000000000 Binary files a/docs/reference/connector/docs/images/pki-login-screen.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/saml-login.png b/docs/reference/connector/docs/images/saml-login.png deleted file mode 100644 index f8d5771363efc..0000000000000 Binary files a/docs/reference/connector/docs/images/saml-login.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/search-applications-create.png b/docs/reference/connector/docs/images/search-applications-create.png deleted file mode 100644 index cce31b985ad82..0000000000000 Binary files a/docs/reference/connector/docs/images/search-applications-create.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/search-applications-docs-explorer.png b/docs/reference/connector/docs/images/search-applications-docs-explorer.png deleted file mode 100644 index d9b2cfa05f986..0000000000000 Binary files a/docs/reference/connector/docs/images/search-applications-docs-explorer.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/search-applications-unified-search.png b/docs/reference/connector/docs/images/search-applications-unified-search.png deleted file mode 100644 index 2eca235b2d968..0000000000000 Binary files a/docs/reference/connector/docs/images/search-applications-unified-search.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/select-ingestion-method.png b/docs/reference/connector/docs/images/select-ingestion-method.png deleted file mode 100644 index 29dc3630e1237..0000000000000 Binary files a/docs/reference/connector/docs/images/select-ingestion-method.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/simple-rule-equals.png b/docs/reference/connector/docs/images/simple-rule-equals.png deleted file mode 100644 index 5dd5e43427ea5..0000000000000 Binary files a/docs/reference/connector/docs/images/simple-rule-equals.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/sync-rules-detail-view-button.png b/docs/reference/connector/docs/images/sync-rules-detail-view-button.png deleted file mode 100644 index f24daea0d4351..0000000000000 Binary files a/docs/reference/connector/docs/images/sync-rules-detail-view-button.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/sync-rules-new-rule-applied.png b/docs/reference/connector/docs/images/sync-rules-new-rule-applied.png deleted file mode 100644 index 936b7b98b7cda..0000000000000 Binary files a/docs/reference/connector/docs/images/sync-rules-new-rule-applied.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/workplace-search-create-role-mapping.png b/docs/reference/connector/docs/images/workplace-search-create-role-mapping.png deleted file mode 100644 index 92c22b80ea30d..0000000000000 Binary files a/docs/reference/connector/docs/images/workplace-search-create-role-mapping.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/workplace-search-kibana-ui.png b/docs/reference/connector/docs/images/workplace-search-kibana-ui.png deleted file mode 100644 index ea5f4fcfca4df..0000000000000 Binary files a/docs/reference/connector/docs/images/workplace-search-kibana-ui.png and /dev/null differ diff --git a/docs/reference/connector/docs/images/workplace-search-standalone-ui.png b/docs/reference/connector/docs/images/workplace-search-standalone-ui.png deleted file mode 100644 index b3a1e12e63fa3..0000000000000 Binary files a/docs/reference/connector/docs/images/workplace-search-standalone-ui.png and /dev/null differ diff --git a/docs/reference/connector/docs/index.asciidoc b/docs/reference/connector/docs/index.asciidoc deleted file mode 100644 index dfca45f86ebce..0000000000000 --- a/docs/reference/connector/docs/index.asciidoc +++ /dev/null @@ -1,130 +0,0 @@ -[#es-connectors] -= Ingest content with Elastic connectors -++++ -Connectors -++++ - -.Connectors documentation history -**** -Please note that the connectors documentation lived in the https://www.elastic.co/guide/en/enterprise-search/8.15/connectors-references.html[Enterprise Search documentation] prior to version 8.16.0. -**** - -A _connector_ is a type of https://www.elastic.co/integrations/data-integrations[Elastic integration^] that syncs content from an original data source to an *Elasticsearch index*. -Connectors enable you to create _searchable_, read-only replicas of your data sources. - -[IMPORTANT] -==== -These connectors are focused on general content, which is non-timestamped data. -Refer to https://www.elastic.co/guide/en/cloud/current/ec-cloud-ingest-data.html[add data to {es}] if you're interested in ingesting timestamped data. -==== - -Connectors extract the original files, records, or objects; and transforms them into Elasticsearch documents. - -Many connectors are available out-of-the-box on Elastic Cloud. -You can also access the source code for these (and additional) connectors, and run them on your own infrastructure. - -* **Managed connectors** are available directly within your Elastic Cloud deployment. -* **Self-managed connectors** are self-managed on your own infrastructure. - -[discrete#es-connectors-native] -== Elastic managed connectors {ess-icon} - -_Managed connectors_ are available directly within your Elastic Cloud deployment, as a managed service. -No additional infrastructure is required. - -Please note that Elastic managed connectors were previously known as "native connectors". - -Refer to <> for details on how to configure and use Elastic managed connectors. - -.*Expand* for list of available Elastic managed connectors -[%collapsible] -==== -include::_connectors-list-native.asciidoc[] -==== - -[discrete#es-connectors-build] -== Self-managed connectors - -Self-managed connectors enable you to run connectors locally on your own infrastructure. -This means you can try out new connectors before they are available natively within Elastic Cloud, and/or customize existing connectors. - -Please note that self-managed connectors were previously known as "connector clients". - -Refer to <> for details on how to deploy self-managed connectors. - -.*Expand* for list of available self-managed connectors -[%collapsible] -==== -include::_connectors-list-clients.asciidoc[] -==== - -[discrete#es-connectors-overview-framework] -== Connector framework - -All Elastic connectors are built using our Python connector framework. -The source code is available in the {connectors-python}[`elastic/connectors`] repository on GitHub. - -The connector framework is available for developers to customize existing self-managed connectors or build their own connectors. -Refer to <> for details. - -[discrete#es-connectors-overview-diagram] -== Connectors overview diagram - -The following diagram provides a high-level overview of the Elastic connectors offering and some key facts. - -image::connectors-overview.svg[align="center",width="100%"] - -[discrete#es-connectors-overview-available-connectors] -== Available connectors and feature support - -include::_connectors-overview-table.asciidoc[] - - -:connectors-branch: {branch} - -ifeval::['{branch}' == 'master'] -:connectors-branch: main -endif::[] - -:connectors-python: https://github.com/elastic/connectors/tree/{connectors-branch} -:connectors-ruby: https://github.com/elastic/connectors-ruby/tree/{connectors-branch} - -include::connectors-refs.asciidoc[] - - -include::connectors-self-managed.asciidoc[] -include::connectors-run-from-docker.asciidoc[] -include::connectors-run-from-source.asciidoc[] -include::connectors-docker-compose-quickstart.asciidoc[] -include::postgresql-connector-client-tutorial.asciidoc[] - - -include::connectors-managed-service.asciidoc[] -include::connectors-hosted-tutorial-mongo.asciidoc[] - -include::connectors-framework.asciidoc[] - -include::connectors-usage.asciidoc[] - -include::connectors-APIs.asciidoc[] -include::connectors-API-tutorial.asciidoc[] -include::connectors-content-syncs.asciidoc[] -include::connectors-filter-extract-transform.asciidoc[] -include::connectors-content-extraction.asciidoc[] -include::sync-rules.asciidoc[] - -include::dls.asciidoc[] - - -include::connectors-management.asciidoc[] -include::connectors-scalability.asciidoc[] -include::connectors-security.asciidoc[] -include::connectors-troubleshooting.asciidoc[] -include::connectors-logs.asciidoc[] - -include::connectors-use-cases.asciidoc[] - - -include::connectors-release-notes.asciidoc[] -include::connectors-known-issues.asciidoc[] - diff --git a/docs/reference/connector/docs/postgresql-connector-client-tutorial.asciidoc b/docs/reference/connector/docs/postgresql-connector-client-tutorial.asciidoc deleted file mode 100644 index 3a3ab242a47aa..0000000000000 --- a/docs/reference/connector/docs/postgresql-connector-client-tutorial.asciidoc +++ /dev/null @@ -1,234 +0,0 @@ -[#es-postgresql-connector-client-tutorial] -=== PostgreSQL self-managed connector tutorial -++++ -Tutorial -++++ - -This tutorial walks you through the process of creating a self-managed connector for a PostgreSQL data source. -You'll be using the <> workflow in the Kibana UI. -This means you'll be deploying the connector on your own infrastructure. -Refer to the <> for more information about this connector. - -You'll use the {connectors-python}[connector framework^] to create the connector. -In this exercise, you'll be working in both the terminal (or your IDE) and the Kibana UI. - -If you want to deploy a self-managed connector for another data source, use this tutorial as a blueprint. -Refer to the list of available <>. - -[TIP] -==== -Want to get started quickly testing a self-managed connector using Docker Compose? -Refer to this https://github.com/elastic/connectors/tree/main/scripts/stack#readme[README] in the `elastic/connectors` repo for more information. -==== - -[discrete#es-postgresql-connector-client-tutorial-prerequisites] -==== Prerequisites - -[discrete#es-postgresql-connector-client-tutorial-prerequisites-elastic] -===== Elastic prerequisites - -First, ensure you satisfy the <> for self-managed connectors. - -[discrete#es-postgresql-connector-client-tutorial-postgresql-prerequisites] -===== PostgreSQL prerequisites - -You need: - -* PostgreSQL version 11+. -* Tables must be owned by a PostgreSQL user. -* Database `superuser` privileges are required to index all database tables. - -[TIP] -==== -You should enable recording of the commit time of PostgreSQL transactions. -Otherwise, _all_ data will be indexed in every sync. -By default, `track_commit_timestamp` is `off`. - -Enable this by running the following command on the PosgreSQL server command line: - -[source,shell] ----- -ALTER SYSTEM SET track_commit_timestamp = on; ----- - -Then restart the PostgreSQL server. -==== - -[discrete#es-postgresql-connector-client-tutorial-steps] -==== Steps - -To complete this tutorial, you'll need to complete the following steps: - -. <> -. <> -. <> -. <> - -[discrete#es-postgresql-connector-client-tutorial-create-index] -==== Create an Elasticsearch index - -Elastic connectors enable you to create searchable, read-only replicas of your data sources in Elasticsearch. -The first step in setting up your self-managed connector is to create an index. - -In the {kibana-ref}[Kibana^] UI, navigate to *Search > Content > Elasticsearch indices* from the main menu, or use the {kibana-ref}/kibana-concepts-analysts.html#_finding_your_apps_and_objects[global search field]. - -Create a new connector index: - -. Under *Select an ingestion method* choose *Connector*. -. Choose *PostgreSQL* from the list of connectors. -. Name your index and optionally change the language analyzer to match the human language of your data source. -(The index name you provide is automatically prefixed with `search-`.) -. Save your changes. - -The index is created and ready to configure. - -[discrete#es-postgresql-connector-client-tutorial-gather-elastic-details] -.Gather Elastic details -**** -Before you can configure the connector, you need to gather some details about your Elastic deployment: - -* *Elasticsearch endpoint*. -** If you're an Elastic Cloud user, find your deployment’s Elasticsearch endpoint in the Cloud UI under *Cloud > Deployments > > Elasticsearch*. -** If you're running your Elastic deployment and the connector service in Docker, the default Elasticsearch endpoint is `http://host.docker.internal:9200`. -* *API key.* -You'll need this key to configure the connector. -Use an existing key or create a new one. -* *Connector ID*. -Your unique connector ID is automatically generated when you create the connector. -Find this in the Kibana UI. -**** - -[discrete#es-postgresql-connector-client-tutorial-setup-connector] -==== Set up the connector - -Once you've created an index, you can set up the connector. -You will be guided through this process in the UI. - -. *Edit the name and description for the connector.* -This will help your team identify the connector. -. *Clone and edit the connector service code.* -For this example, we'll use the {connectors-python}[Python framework^]. -Follow these steps: -** Clone or fork that repository locally with the following command: `git clone https://github.com/elastic/connectors`. -** Open the `config.yml` configuration file in your editor of choice. -** Replace the values for `host`, `api_key`, and `connector_id` with the values you gathered <>. -Use the `service_type` value `postgresql` for this connector. -+ -.*Expand* to see an example `config.yml` file -[%collapsible] -==== -Replace the values for `host`, `api_key`, and `connector_id` with your own values. -Use the `service_type` value `postgresql` for this connector. -[source,yaml] ----- -elasticsearch: - host: > # Your Elasticsearch endpoint - api_key: '' # Your top-level Elasticsearch API key -... -connectors: - - - connector_id: "" - api_key: "'" # Your scoped connector index API key (optional). If not provided, the top-level API key is used. - service_type: "postgresql" - - - -# Self-managed connector settings -connector_id: '' # Your connector ID -service_type: 'postgresql' # The service type for your connector - -sources: - # mongodb: connectors.sources.mongo:MongoDataSource - # s3: connectors.sources.s3:S3DataSource - # dir: connectors.sources.directory:DirectoryDataSource - # mysql: connectors.sources.mysql:MySqlDataSource - # network_drive: connectors.sources.network_drive:NASDataSource - # google_cloud_storage: connectors.sources.google_cloud_storage:GoogleCloudStorageDataSource - # azure_blob_storage: connectors.sources.azure_blob_storage:AzureBlobStorageDataSource - postgresql: connectors.sources.postgresql:PostgreSQLDataSource - # oracle: connectors.sources.oracle:OracleDataSource - # sharepoint: connectors.sources.sharepoint:SharepointDataSource - # mssql: connectors.sources.mssql:MSSQLDataSource - # jira: connectors.sources.jira:JiraDataSource ----- -==== - -[discrete#es-postgresql-connector-client-tutorial-run-connector-service] -==== Run the connector service - -Now that you've configured the connector code, you can run the connector service. - -In your terminal or IDE: - -. `cd` into the root of your `connectors` clone/fork. -. Run the following command: `make run`. - -The connector service should now be running. -The UI will let you know that the connector has successfully connected to Elasticsearch. - -Here we're working locally. -In production setups, you'll deploy the connector service to your own infrastructure. -If you prefer to use Docker, refer to the {connectors-python}/docs/DOCKER.md[repo docs^] for instructions. - -[discrete#es-postgresql-connector-client-tutorial-sync-data-source] -==== Sync your PostgreSQL data source - -[discrete#es-postgresql-connector-client-tutorial-sync-data-source-details] -===== Enter your PostgreSQL data source details - -Once you've configured the connector, you can use it to index your data source. - -You can now enter your PostgreSQL instance details in the Kibana UI. - -Enter the following information: - -* *Host*. -Server host address for your PostgreSQL instance. -* *Port*. -Port number for your PostgreSQL instance. -* *Username*. -Username of the PostgreSQL account. -* *Password*. -Password for that user. -* *Database*. -Name of the PostgreSQL database. -* *Comma-separated list of tables*. -`*` will fetch data from all tables in the configured database. - -Once you've entered all these details, select *Save configuration*. - -[discrete#es-postgresql-connector-client-tutorial-sync-data-source-launch-sync] -===== Launch a sync - -If you navigate to the *Overview* tab in the Kibana UI, you can see the connector's _ingestion status_. -This should now have changed to *Configured*. - -It's time to launch a sync by selecting the *Sync* button. - -If you navigate to the terminal window where you're running the connector service, you should see output like the following: - -[source,shell] ----- -[FMWK][13:22:26][INFO] Fetcher -[FMWK][13:22:26][INF0] Fetcher -[FMWK][13:22:26][INFO] Fetcher -... -[FMWK][23:22:28][INF0] [oRXQwYYBLhXTs-qYpJ9i] Sync done: 3864 indexed, 0 deleted. -(27 seconds) ----- - -This confirms the connector has fetched records from your PostgreSQL table(s) and transformed them into documents in your Elasticsearch index. - -Verify your Elasticsearch documents in the *Documents* tab in the Kibana UI. - -If you're happy with the results, set a recurring sync schedule in the *Scheduling* tab. -This will ensure your _searchable_ data in Elasticsearch is always up to date with changes to your PostgreSQL data source. - -[discrete#es-postgresql-connector-client-tutorial-learn-more] -==== Learn more - -* <> -* {connectors-python}[Elastic connector framework repository^] -* <> -* <> -* <> \ No newline at end of file diff --git a/docs/reference/connector/docs/sync-rules.asciidoc b/docs/reference/connector/docs/sync-rules.asciidoc deleted file mode 100644 index 3ab72093666b8..0000000000000 --- a/docs/reference/connector/docs/sync-rules.asciidoc +++ /dev/null @@ -1,333 +0,0 @@ -[#es-sync-rules] -=== Connector sync rules -++++ -Sync rules -++++ - -Use connector sync rules to help control which documents are synced between the third-party data source and Elasticsearch. -Define sync rules in the Kibana UI for each connector index, under the `Sync rules` tab for the index. - -Sync rules apply to <> and <>. - -[discrete#es-sync-rules-availability-prerequisites] -==== Availability and prerequisites - -In Elastic versions *8.8.0 and later* all connectors have support for _basic_ sync rules. - -Some connectors support _advanced_ sync rules. -Learn more in the <>. - -[discrete#es-sync-rules-types] -==== Types of sync rule - -There are two types of sync rule: - -* **Basic sync rules** - these rules are represented in a table-like view. -Basic sync rules are identical for all connectors. -* **Advanced sync rules** - these rules cover complex query-and-filter scenarios that cannot be expressed with basic sync rules. -Advanced sync rules are defined through a _source-specific_ DSL JSON snippet. - -[.screenshot] -image::images/filtering-rules-zero-state.png[Sync rules tab] - -[discrete#es-sync-rules-general-filtering] -==== General data filtering concepts - -Before discussing sync rules, it's important to establish a basic understanding of _data filtering_ concepts. -The following diagram shows that data filtering can occur in several different processes/locations. - -[.screenshot] -image::images/filtering-general-diagram.png[Filtering] - -In this documentation we will focus on remote and integration filtering. -Sync rules can be used to modify both of these. - -[discrete#es-sync-rules-general-filtering-remote] -===== Remote filtering - -Data might be filtered at its source. -We call this *remote filtering*, as the filtering process is external to Elastic. - -[discrete#es-sync-rules-general-filtering-integration] -===== Integration filtering - -*Integration filtering* acts as a bridge between the original data source and Elasticsearch. -Filtering that takes place in connectors is an example of integration filtering. - -[discrete#es-sync-rules-general-filtering-pipeline] -===== Pipeline filtering - -Finally, Elasticsearch can filter data right _before persistence_ using {ref}/ingest-pipeline-search.html[ingest pipelines]. -We will not focus on ingest pipeline filtering in this guide. - -[NOTE] -==== -Currently, basic sync rules are the only way to control _integration filtering_ for connectors. -Remember that remote filtering extends far beyond the scope of connectors alone. -For best results, collaborate with the owners and maintainers of your data source. -Ensure the source data is well-organized and optimized for the query types made by the connectors. -==== - -[discrete#es-sync-rules-overview] -==== Sync rules overview - -In most cases, your data lake will contain far more data than you want to expose to end users. -For example, you may want to search a product catalog, but not include vendor contact information, even if the two are co-located for business purposes. - -The optimal time to filter data is _early_ in the data pipeline. -There are two main reasons: - -* *Performance*: -It's more efficient to send a query to the backing data source than to obtain all the data and then filter it in the connector. -It's faster to send a smaller dataset over a network and to process it on the connector side. -* *Security*: -Query-time filtering is applied on the data source side, so the data is not sent over the network and into the connector, which limits the exposure of your data. - -In a perfect world, all filtering would be done as remote filtering. - -In practice, however, this is not always possible. -Some sources do not allow robust remote filtering. -Others do, but require special setup (building indexes on specific fields, tweaking settings, etc.) that may require attention from other stakeholders in your organization. - -With this in mind, sync rules were designed to modify both remote filtering and integration filtering. -Your goal should be to do as much remote filtering as possible, but integration is a perfectly viable fall-back. -By definition, remote filtering is applied before the data is obtained from a third-party source. -Integration filtering is applied after the data is obtained from a third-party source, but before it is ingested into the Elasticsearch index. - -[NOTE] -==== -All sync rules are applied to a given document _before_ any {ref}/ingest-pipeline-search.html[ingest pipelines] are run on that document. -Therefore, you can use ingest pipelines for any processing that must occur _after_ integration filtering has occurred. -==== - -[NOTE] -==== -If a sync rule is added, edited or removed, it will only take effect after the next full sync. -==== - -[discrete#es-sync-rules-basic] -==== Basic sync rules - -Each basic sync rules can be one of two "policies": `include` and `exclude`. -`Include` rules are used to include the documents that "match" the specified condition. -`Exclude` rules are used to exclude the documents that "match" the specified condition. - -A "match" is determined based on a condition defined by a combination of "field", "rule", and "value". - -The `Field` column should be used to define which field on a given document should be considered. - -[NOTE] -==== -Only top-level fields are supported. -Nested/object fields cannot be referenced with "dot notation". -==== - -The following rules are available in the `Rule` column: - -* `equals` - The field value is equal to the specified value. -* `starts_with` - The field value starts with the specified (string) value. -* `ends_with` - The field value ends with the specified (string) value. -* `contains` - The field value includes the specified (string) value. -* `regex` - The field value matches the specified https://en.wikipedia.org/wiki/Regular_expression[regular expression^]. -* `>` - The field value is greater than the specified value. -* `<` - The field value is less than the specified value. - -Finally, the `Value` column is dependent on: - -* the data type in the specified "field" -* which "rule" was selected. - -For example, if a value of `[A-Z]{2}` might make sense for a `regex` rule, but much less so for a `>` rule. -Similarly, you probably wouldn't have a value of `espresso` when operating on an `ip_address` field, but perhaps you would for a `beverage` field. - -[discrete#es-sync-rules-basic-examples] -===== Basic sync rules examples - -[discrete#es-sync-rules-basic-examples-1] -====== Example 1 - -Exclude all documents that have an `ID` field with the value greater than 1000. - -[.screenshot] -image::images/simple-rule-greater.png[Simple greater than rule] - -[discrete#es-sync-rules-basic-examples-2] -====== Example 2 - -Exclude all documents that have a `state` field that matches a specified regex. - -[.screenshot] -image::images/simple-rule-regex.png[Simple regex rule] - -[discrete#es-sync-rules-performance-implications] -===== Performance implications - -- If you're relying solely on basic sync rules in the integration filtering phase the connector will fetch *all* the data from the data source -- For data sources without automatic pagination, or similar optimizations, fetching all the data can lead to memory issues. -For example, loading datasets which are too big to fit in memory at once. - -[NOTE] -==== -The native MongoDB connector provided by Elastic uses pagination and therefore has optimized performance. -Keep in mind that custom community-built self-managed connectors may not have these performance optimizations. -==== - -The following diagrams illustrate the concept of pagination. -A huge data set may not fit into a connector instance's memory. -Splitting data into smaller chunks reduces the risk of out-of-memory errors. - -This diagram illustrates an entire dataset being extracted at once: -[.screenshot] -image::images/sync-rules-extract-all-at-once.png[Extract whole dataset at once] - -By comparison, this diagram illustrates a paginated dataset: - -[.screenshot] -image::images/sync-rules-pagination.png[Pagination] - -[discrete#es-sync-rules-advanced] -==== Advanced sync rules - -[IMPORTANT] -==== -Advanced sync rules overwrite any remote filtering query that could have been inferred from the basic sync rules. -If an advanced sync rule is defined, any defined basic sync rules will be used exclusively for integration filtering. -==== - -Advanced sync rules are only used in remote filtering. -You can think of advanced sync rules as a language-agnostic way to represent queries to the data source. -Therefore, these rules are highly *source-specific*. - -The following connectors support advanced sync rules: - -include::_connectors-list-advanced-rules.asciidoc[] - -Each connector supporting advanced sync rules provides its own DSL to specify rules. -Refer to the documentation for <> for details. - -[discrete#es-interplay-basic-rules-advanced-rules] -==== Combining basic and advanced sync rules - -You can also use basic sync rules and advanced sync rules together to filter a data set. - -The following diagram provides an overview of the order in which advanced sync rules, basic sync rules, and pipeline filtering, are applied to your documents: - -[.screenshot] -image::images/sync-rules-time-dimension.png[Sync Rules: What is applied when?] - -[discrete#es-example-interplay-basic-rules-advanced-rules] -===== Example - -In the following example we want to filter a data set containing apartments to only contain apartments with specific properties. -We'll use basic and advanced sync rules throughout the example. - -A sample apartment looks like this in the `.json` format: -[source, js] ----- - { - "id": 1234, - "bedrooms": 3, - "price": 1500, - "address": { - "street": "Street 123", - "government_area": "Area", - "country_information": { - "country_code": "PT", - "country": "Portugal" - } - } -} ----- -// NOTCONSOLE - -The target data set should fulfill the following conditions: - -. Every apartment should have at least *3 bedrooms* -. The apartments should not be more expensive than *1500 per month* -. The apartment with id '1234' should get included without considering the first two conditions -. Each apartment should be located in either 'Portugal' or 'Spain' - -The first 3 conditions can be handled by basic sync rules, but we'll need to use advanced sync rules for number 4. - -[discrete#es-example-interplay-basic-rules] -====== Basic sync rules examples - -To create a new basic sync rule, navigate to the 'Sync Rules' tab and select *Draft new sync rules*: - -[.screenshot] -image::images/sync-rules-draft-new-rules.png[Draft new rules] - -Afterwards you need to press the 'Save and validate draft' button to validate these rules. -Note that when saved the rules will be in _draft_ state. They won't be executed in the next sync unless they are _applied_. - -[.screenshot] -image::images/sync-rules-save-and-validate-draft.png[Save and validate draft] - -After a successful validation you can apply your rules so they'll be executed in the next sync. - -These following conditions can be covered by basic sync rules: - -1. The apartment with id '1234' should get included without considering the first two conditions -2. Every apartment should have at least three bedrooms -3. The apartments should not be more expensive than 1000/month - -[.screenshot] -image::images/sync-rules-rules-fulfilling-properties.png[Save and validate draft] - -[NOTE] -==== -Remember that order matters for basic sync rules. -You may get different results for a different ordering. -==== - -[discrete#es-example-interplay-advanced-rules] -====== Advanced sync rules example - -You want to only include apartments which are located in Portugal or Spain. -We need to use advanced sync rules here, because we're dealing with deeply nested objects. - -Let's assume that the apartment data is stored inside a MongoDB instance. -For MongoDB we support https://www.mongodb.com/docs/manual/core/aggregation-pipeline/[aggregation pipelines^] in our advanced sync rules among other things. -An aggregation pipeline to only select properties located in Portugal or Spain looks like this: -[source, js] ----- - [ - { - "$match": { - "$or": [ - { - "address.country_information.country": "Portugal" - }, - { - "address.country_information.country": "Spain" - } - ] - } - } - ] ----- -// NOTCONSOLE - -To create these advanced sync rules navigate to the sync rules creation dialog and select the 'Advanced rules' tab. -You can now paste your aggregation pipeline into the input field under `aggregate.pipeline`: - -[.screenshot] -image::images/sync-rules-paste-aggregation-pipeline.png[Paste aggregation pipeline] - -Once validated, apply these rules. -The following screenshot shows the applied sync rules, which will be executed in the next sync: - -[.screenshot] -image::images/sync-rules-advanced-rules-appeared.png[Advanced sync rules appeared] - -After a successful sync you can expand the sync details to see which rules were applied: - -[.screenshot] -image::images/sync-rules-applied-rules-during-sync.png[Applied rules during sync] - -[WARNING] -==== -Active sync rules can become invalid when changed outside of the UI. -Sync jobs with invalid rules will fail. -One workaround is to revalidate the draft rules and override the invalid active rules. -==== diff --git a/docs/reference/data-analysis/aggregations/bucket.md b/docs/reference/data-analysis/aggregations/bucket.md new file mode 100644 index 0000000000000..fe37ca814a23f --- /dev/null +++ b/docs/reference/data-analysis/aggregations/bucket.md @@ -0,0 +1,52 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket.html +--- + +# Bucket [search-aggregations-bucket] + +Bucket aggregations don’t calculate metrics over fields like the metrics aggregations do, but instead, they create buckets of documents. Each bucket is associated with a criterion (depending on the aggregation type) which determines whether or not a document in the current context "falls" into it. In other words, the buckets effectively define document sets. In addition to the buckets themselves, the `bucket` aggregations also compute and return the number of documents that "fell into" each bucket. + +Bucket aggregations, as opposed to `metrics` aggregations, can hold sub-aggregations. These sub-aggregations will be aggregated for the buckets created by their "parent" bucket aggregation. + +There are different bucket aggregators, each with a different "bucketing" strategy. Some define a single bucket, some define fixed number of multiple buckets, and others dynamically create the buckets during the aggregation process. + +::::{note} +The [`search.max_buckets`](/reference/elasticsearch/configuration-reference/search-settings.md#search-settings-max-buckets) cluster setting limits the number of buckets allowed in a single response. +:::: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/reference/data-analysis/aggregations/index.md b/docs/reference/data-analysis/aggregations/index.md new file mode 100644 index 0000000000000..b26b69969c4e7 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/index.md @@ -0,0 +1,22 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html +--- + +# Aggregations + +% What needs to be done: Refine + +% Scope notes: need to scope the page down to just reference content + +% Use migrated content from existing pages that map to this page: + +% - [ ] ./raw-migrated-files/elasticsearch/elasticsearch-reference/search-aggregations.md + +Aggregations are a powerful framework that enables you to perform complex data analysis and summarization over indexed documents. They enable you to extract and compute statistics, trends, and patterns from large datasets. + +{{es}} organizes aggregations into three categories: + +* Metric aggregations that calculate metrics, such as a sum or average, from field values. +* Bucket aggregations that group documents into buckets, also called bins, based on field values, ranges, or other criteria. +* Pipeline aggregations that take input from other aggregations instead of documents or fields. \ No newline at end of file diff --git a/docs/reference/data-analysis/aggregations/metrics.md b/docs/reference/data-analysis/aggregations/metrics.md new file mode 100644 index 0000000000000..1e13c745cf6e2 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/metrics.md @@ -0,0 +1,36 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics.html +--- + +# Metrics [search-aggregations-metrics] + +The aggregations in this family compute metrics based on values extracted in one way or another from the documents that are being aggregated. The values are typically extracted from the fields of the document (using the field data), but can also be generated using scripts. + +Numeric metrics aggregations are a special type of metrics aggregation which output numeric values. Some aggregations output a single numeric metric (e.g. `avg`) and are called `single-value numeric metrics aggregation`, others generate multiple metrics (e.g. `stats`) and are called `multi-value numeric metrics aggregation`. The distinction between single-value and multi-value numeric metrics aggregations plays a role when these aggregations serve as direct sub-aggregations of some bucket aggregations (some bucket aggregations enable you to sort the returned buckets based on the numeric metrics in each bucket). + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/reference/data-analysis/aggregations/pipeline.md b/docs/reference/data-analysis/aggregations/pipeline.md new file mode 100644 index 0000000000000..a6c16b504e5a2 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/pipeline.md @@ -0,0 +1,270 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline.html +--- + +# Pipeline [search-aggregations-pipeline] + +Pipeline aggregations work on the outputs produced from other aggregations rather than from document sets, adding information to the output tree. There are many different types of pipeline aggregation, each computing different information from other aggregations, but these types can be broken down into two families: + +*Parent* +: A family of pipeline aggregations that is provided with the output of its parent aggregation and is able to compute new buckets or new aggregations to add to existing buckets. + +*Sibling* +: Pipeline aggregations that are provided with the output of a sibling aggregation and are able to compute a new aggregation which will be at the same level as the sibling aggregation. + +Pipeline aggregations can reference the aggregations they need to perform their computation by using the `buckets_path` parameter to indicate the paths to the required metrics. The syntax for defining these paths can be found in the [`buckets_path` Syntax](#buckets-path-syntax) section below. + +Pipeline aggregations cannot have sub-aggregations but depending on the type it can reference another pipeline in the `buckets_path` allowing pipeline aggregations to be chained. For example, you can chain together two derivatives to calculate the second derivative (i.e. a derivative of a derivative). + +::::{note} +Because pipeline aggregations only add to the output, when chaining pipeline aggregations the output of each pipeline aggregation will be included in the final output. +:::: + + + +## `buckets_path` Syntax [buckets-path-syntax] + +Most pipeline aggregations require another aggregation as their input. The input aggregation is defined via the `buckets_path` parameter, which follows a specific format: + +```ebnf +AGG_SEPARATOR = `>` ; +METRIC_SEPARATOR = `.` ; +AGG_NAME = ; +METRIC = ; +MULTIBUCKET_KEY = `[]` +PATH = ? (, )* ( , ) ; +``` + +For example, the path `"my_bucket>my_stats.avg"` will path to the `avg` value in the `"my_stats"` metric, which is contained in the `"my_bucket"` bucket aggregation. + +Here are some more examples: + +* `multi_bucket["foo"]>single_bucket>multi_metric.avg` will go to the `avg` metric in the `"multi_metric"` agg under the single bucket `"single_bucket"` within the `"foo"` bucket of the `"multi_bucket"` multi-bucket aggregation. +* `agg1["foo"]._count` will get the `_count` metric for the `"foo"` bucket in the multi-bucket aggregation `"multi_bucket"` + +Paths are relative from the position of the pipeline aggregation; they are not absolute paths, and the path cannot go back "up" the aggregation tree. For example, this derivative is embedded inside a date_histogram and refers to a "sibling" metric `"the_sum"`: + +$$$buckets-path-example$$$ + +```console +POST /_search +{ + "aggs": { + "my_date_histo": { + "date_histogram": { + "field": "timestamp", + "calendar_interval": "day" + }, + "aggs": { + "the_sum": { + "sum": { "field": "lemmings" } <1> + }, + "the_deriv": { + "derivative": { "buckets_path": "the_sum" } <2> + } + } + } + } +} +``` + +1. The metric is called `"the_sum"` +2. The `buckets_path` refers to the metric via a relative path `"the_sum"` + + +`buckets_path` is also used for Sibling pipeline aggregations, where the aggregation is "next" to a series of buckets instead of embedded "inside" them. For example, the `max_bucket` aggregation uses the `buckets_path` to specify a metric embedded inside a sibling aggregation: + +$$$buckets-path-sibling-example$$$ + +```console +POST /_search +{ + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + } + } + }, + "max_monthly_sales": { + "max_bucket": { + "buckets_path": "sales_per_month>sales" <1> + } + } + } +} +``` + +1. `buckets_path` instructs this max_bucket aggregation that we want the maximum value of the `sales` aggregation in the `sales_per_month` date histogram. + + +If a Sibling pipeline agg references a multi-bucket aggregation, such as a `terms` agg, it also has the option to select specific keys from the multi-bucket. For example, a `bucket_script` could select two specific buckets (via their bucket keys) to perform the calculation: + +$$$buckets-path-specific-bucket-example$$$ + +```console +POST /_search +{ + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sale_type": { + "terms": { + "field": "type" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + } + } + }, + "hat_vs_bag_ratio": { + "bucket_script": { + "buckets_path": { + "hats": "sale_type['hat']>sales", <1> + "bags": "sale_type['bag']>sales" <1> + }, + "script": "params.hats / params.bags" + } + } + } + } + } +} +``` + +1. `buckets_path` selects the hats and bags buckets (via `['hat']`/`['bag']``) to use in the script specifically, instead of fetching all the buckets from `sale_type` aggregation + + + +## Special Paths [_special_paths] + +Instead of pathing to a metric, `buckets_path` can use a special `"_count"` path. This instructs the pipeline aggregation to use the document count as its input. For example, a derivative can be calculated on the document count of each bucket, instead of a specific metric: + +$$$buckets-path-count-example$$$ + +```console +POST /_search +{ + "aggs": { + "my_date_histo": { + "date_histogram": { + "field": "timestamp", + "calendar_interval": "day" + }, + "aggs": { + "the_deriv": { + "derivative": { "buckets_path": "_count" } <1> + } + } + } + } +} +``` + +1. By using `_count` instead of a metric name, we can calculate the derivative of document counts in the histogram + + +The `buckets_path` can also use `"_bucket_count"` and path to a multi-bucket aggregation to use the number of buckets returned by that aggregation in the pipeline aggregation instead of a metric. For example, a `bucket_selector` can be used here to filter out buckets which contain no buckets for an inner terms aggregation: + +$$$buckets-path-bucket-count-example$$$ + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "histo": { + "date_histogram": { + "field": "date", + "calendar_interval": "day" + }, + "aggs": { + "categories": { + "terms": { + "field": "category" + } + }, + "min_bucket_selector": { + "bucket_selector": { + "buckets_path": { + "count": "categories._bucket_count" <1> + }, + "script": { + "source": "params.count != 0" + } + } + } + } + } + } +} +``` + +1. By using `_bucket_count` instead of a metric name, we can filter out `histo` buckets where they contain no buckets for the `categories` aggregation + + + +## Dealing with dots in agg names [dots-in-agg-names] + +An alternate syntax is supported to cope with aggregations or metrics which have dots in the name, such as the `99.9`th [percentile](/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md). This metric may be referred to as: + +```js +"buckets_path": "my_percentile[99.9]" +``` + + +## Dealing with gaps in the data [gap-policy] + +Data in the real world is often noisy and sometimes contains **gaps** — places where data simply doesn’t exist. This can occur for a variety of reasons, the most common being: + +* Documents falling into a bucket do not contain a required field +* There are no documents matching the query for one or more buckets +* The metric being calculated is unable to generate a value, likely because another dependent bucket is missing a value. Some pipeline aggregations have specific requirements that must be met (e.g. a derivative cannot calculate a metric for the first value because there is no previous value, HoltWinters moving average need "warmup" data to begin calculating, etc) + +Gap policies are a mechanism to inform the pipeline aggregation about the desired behavior when "gappy" or missing data is encountered. All pipeline aggregations accept the `gap_policy` parameter. There are currently two gap policies to choose from: + +*skip* +: This option treats missing data as if the bucket does not exist. It will skip the bucket and continue calculating using the next available value. + +*insert_zeros* +: This option will replace missing values with a zero (`0`) and pipeline aggregation computation will proceed as normal. + +*keep_values* +: This option is similar to skip, except if the metric provides a non-null, non-NaN value this value is used, otherwise the empty bucket is skipped. + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-adjacency-matrix-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-adjacency-matrix-aggregation.md new file mode 100644 index 0000000000000..eb1bc60441714 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-adjacency-matrix-aggregation.md @@ -0,0 +1,129 @@ +--- +navigation_title: "Adjacency matrix" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-adjacency-matrix-aggregation.html +--- + +# Adjacency matrix aggregation [search-aggregations-bucket-adjacency-matrix-aggregation] + + +A bucket aggregation returning a form of [adjacency matrix](https://en.wikipedia.org/wiki/Adjacency_matrix). The request provides a collection of named filter expressions, similar to the `filters` aggregation request. Each bucket in the response represents a non-empty cell in the matrix of intersecting filters. + +Given filters named `A`, `B` and `C` the response would return buckets with the following names: + +| | A | B | C | +| --- | --- | --- | --- | +| A | A | A&B | A&C | +| B | | B | B&C | +| C | | | C | + +The intersecting buckets e.g `A&C` are labelled using a combination of the two filter names with a default separator of `&`. Note that the response does not also include a `C&A` bucket as this would be the same set of documents as `A&C`. The matrix is said to be *symmetric* so we only return half of it. To do this we sort the filter name strings and always use the lowest of a pair as the value to the left of the separator. + +## Example [adjacency-matrix-agg-ex] + +The following `interactions` aggregation uses `adjacency_matrix` to determine which groups of individuals exchanged emails. + +$$$adjacency-matrix-aggregation-example$$$ + +```console +PUT emails/_bulk?refresh +{ "index" : { "_id" : 1 } } +{ "accounts" : ["hillary", "sidney"]} +{ "index" : { "_id" : 2 } } +{ "accounts" : ["hillary", "donald"]} +{ "index" : { "_id" : 3 } } +{ "accounts" : ["vladimir", "donald"]} + +GET emails/_search +{ + "size": 0, + "aggs" : { + "interactions" : { + "adjacency_matrix" : { + "filters" : { + "grpA" : { "terms" : { "accounts" : ["hillary", "sidney"] }}, + "grpB" : { "terms" : { "accounts" : ["donald", "mitt"] }}, + "grpC" : { "terms" : { "accounts" : ["vladimir", "nigel"] }} + } + } + } + } +} +``` + +The response contains buckets with document counts for each filter and combination of filters. Buckets with no matching documents are excluded from the response. + +```console-result +{ + "took": 9, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "interactions": { + "buckets": [ + { + "key":"grpA", + "doc_count": 2 + }, + { + "key":"grpA&grpB", + "doc_count": 1 + }, + { + "key":"grpB", + "doc_count": 2 + }, + { + "key":"grpB&grpC", + "doc_count": 1 + }, + { + "key":"grpC", + "doc_count": 1 + } + ] + } + } +} +``` + + +## Parameters [adjacency-matrix-agg-params] + +`filters` +: (Required, object) Filters used to create buckets. + + ::::{dropdown} Properties of `filters` + `` + : (Required, [Query DSL object](/reference/query-languages/querydsl.md)) Query used to filter documents. The key is the filter name. + + At least one filter is required. The total number of filters cannot exceed the [`indices.query.bool.max_clause_count`](/reference/elasticsearch/configuration-reference/search-settings.md#indices-query-bool-max-clause-count) setting. See [Filter limits](#adjacency-matrix-agg-filter-limits). + + + :::: + + +`separator` +: (Optional, string) Separator used to concatenate filter names. Defaults to `&`. + + +## Response body [adjacency-matrix-agg-response] + +`key` +: (string) Filters for the bucket. If the bucket uses multiple filters, filter names are concatenated using a `separator`. + +`doc_count` +: (integer) Number of documents matching the bucket’s filters. + + +## Usage [adjacency-matrix-agg-usage] + +On its own this aggregation can provide all of the data required to create an undirected weighted graph. However, when used with child aggregations such as a `date_histogram` the results can provide the additional levels of data required to perform [dynamic network analysis](https://en.wikipedia.org/wiki/Dynamic_network_analysis) where examining interactions *over time* becomes important. + + +## Filter limits [adjacency-matrix-agg-filter-limits] + +For N filters the matrix of buckets produced can be N²/2 which can be costly. The circuit breaker settings prevent results producing too many buckets and to avoid excessive disk seeks the `indices.query.bool.max_clause_count` setting is used to limit the number of filters. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-autodatehistogram-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-autodatehistogram-aggregation.md new file mode 100644 index 0000000000000..67ffd7b0d6291 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-autodatehistogram-aggregation.md @@ -0,0 +1,297 @@ +--- +navigation_title: "Auto-interval date histogram" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-autodatehistogram-aggregation.html +--- + +# Auto-interval date histogram aggregation [search-aggregations-bucket-autodatehistogram-aggregation] + + +A multi-bucket aggregation similar to the [Date histogram](/reference/data-analysis/aggregations/search-aggregations-bucket-datehistogram-aggregation.md) except instead of providing an interval to use as the width of each bucket, a target number of buckets is provided indicating the number of buckets needed and the interval of the buckets is automatically chosen to best achieve that target. The number of buckets returned will always be less than or equal to this target number. + +The buckets field is optional, and will default to 10 buckets if not specified. + +Requesting a target of 10 buckets. + +$$$autodatehistogram-aggregation-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "sales_over_time": { + "auto_date_histogram": { + "field": "date", + "buckets": 10 + } + } + } +} +``` + +## Keys [_keys] + +Internally, a date is represented as a 64 bit number representing a timestamp in milliseconds-since-the-epoch. These timestamps are returned as the bucket `key`s. The `key_as_string` is the same timestamp converted to a formatted date string using the format specified with the `format` parameter: + +::::{tip} +If no `format` is specified, then it will use the first date [format](/reference/elasticsearch/mapping-reference/mapping-date-format.md) specified in the field mapping. +:::: + + +$$$autodatehistogram-aggregation-format-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "sales_over_time": { + "auto_date_histogram": { + "field": "date", + "buckets": 5, + "format": "yyyy-MM-dd" <1> + } + } + } +} +``` + +1. Supports expressive date [format pattern](/reference/data-analysis/aggregations/search-aggregations-bucket-daterange-aggregation.md#date-format-pattern) + + +Response: + +```console-result +{ + ... + "aggregations": { + "sales_over_time": { + "buckets": [ + { + "key_as_string": "2015-01-01", + "key": 1420070400000, + "doc_count": 3 + }, + { + "key_as_string": "2015-02-01", + "key": 1422748800000, + "doc_count": 2 + }, + { + "key_as_string": "2015-03-01", + "key": 1425168000000, + "doc_count": 2 + } + ], + "interval": "1M" + } + } +} +``` + + +## Intervals [_intervals] + +The interval of the returned buckets is selected based on the data collected by the aggregation so that the number of buckets returned is less than or equal to the number requested. The possible intervals returned are: + +seconds +: In multiples of 1, 5, 10 and 30 + +minutes +: In multiples of 1, 5, 10 and 30 + +hours +: In multiples of 1, 3 and 12 + +days +: In multiples of 1, and 7 + +months +: In multiples of 1, and 3 + +years +: In multiples of 1, 5, 10, 20, 50 and 100 + +In the worst case, where the number of daily buckets are too many for the requested number of buckets, the number of buckets returned will be 1/7th of the number of buckets requested. + + +## Time Zone [_time_zone] + +Date-times are stored in Elasticsearch in UTC. By default, all bucketing and rounding is also done in UTC. The `time_zone` parameter can be used to indicate that bucketing should use a different time zone. + +Time zones may either be specified as an ISO 8601 UTC offset (e.g. `+01:00` or `-08:00`) or as a timezone id, an identifier used in the TZ database like `America/Los_Angeles`. + +Consider the following example: + +$$$autodatehistogram-aggregation-timezone-example$$$ + +```console +PUT my-index-000001/_doc/1?refresh +{ + "date": "2015-10-01T00:30:00Z" +} + +PUT my-index-000001/_doc/2?refresh +{ + "date": "2015-10-01T01:30:00Z" +} + +PUT my-index-000001/_doc/3?refresh +{ + "date": "2015-10-01T02:30:00Z" +} + +GET my-index-000001/_search?size=0 +{ + "aggs": { + "by_day": { + "auto_date_histogram": { + "field": "date", + "buckets" : 3 + } + } + } +} +``` + +UTC is used if no time zone is specified, three 1-hour buckets are returned starting at midnight UTC on 1 October 2015: + +```console-result +{ + ... + "aggregations": { + "by_day": { + "buckets": [ + { + "key_as_string": "2015-10-01T00:00:00.000Z", + "key": 1443657600000, + "doc_count": 1 + }, + { + "key_as_string": "2015-10-01T01:00:00.000Z", + "key": 1443661200000, + "doc_count": 1 + }, + { + "key_as_string": "2015-10-01T02:00:00.000Z", + "key": 1443664800000, + "doc_count": 1 + } + ], + "interval": "1h" + } + } +} +``` + +If a `time_zone` of `-01:00` is specified, then midnight starts at one hour before midnight UTC: + +```console +GET my-index-000001/_search?size=0 +{ + "aggs": { + "by_day": { + "auto_date_histogram": { + "field": "date", + "buckets" : 3, + "time_zone": "-01:00" + } + } + } +} +``` + +Now three 1-hour buckets are still returned but the first bucket starts at 11:00pm on 30 September 2015 since that is the local time for the bucket in the specified time zone. + +```console-result +{ + ... + "aggregations": { + "by_day": { + "buckets": [ + { + "key_as_string": "2015-09-30T23:00:00.000-01:00", <1> + "key": 1443657600000, + "doc_count": 1 + }, + { + "key_as_string": "2015-10-01T00:00:00.000-01:00", + "key": 1443661200000, + "doc_count": 1 + }, + { + "key_as_string": "2015-10-01T01:00:00.000-01:00", + "key": 1443664800000, + "doc_count": 1 + } + ], + "interval": "1h" + } + } +} +``` + +1. The `key_as_string` value represents midnight on each day in the specified time zone. + + +::::{warning} +When using time zones that follow DST (daylight savings time) changes, buckets close to the moment when those changes happen can have slightly different sizes than neighbouring buckets. For example, consider a DST start in the `CET` time zone: on 27 March 2016 at 2am, clocks were turned forward 1 hour to 3am local time. If the result of the aggregation was daily buckets, the bucket covering that day will only hold data for 23 hours instead of the usual 24 hours for other buckets. The same is true for shorter intervals like e.g. 12h. Here, we will have only a 11h bucket on the morning of 27 March when the DST shift happens. +:::: + + + +## Minimum Interval parameter [_minimum_interval_parameter] + +The `minimum_interval` allows the caller to specify the minimum rounding interval that should be used. This can make the collection process more efficient, as the aggregation will not attempt to round at any interval lower than `minimum_interval`. + +The accepted units for `minimum_interval` are: + +* year +* month +* day +* hour +* minute +* second + +$$$autodatehistogram-aggregation-minimum-interval-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "sale_date": { + "auto_date_histogram": { + "field": "date", + "buckets": 10, + "minimum_interval": "minute" + } + } + } +} +``` + + +## Missing value [_missing_value] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +$$$autodatehistogram-aggregation-missing-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "sale_date": { + "auto_date_histogram": { + "field": "date", + "buckets": 10, + "missing": "2000/01/01" <1> + } + } + } +} +``` + +1. Documents without a value in the `publish_date` field will fall into the same bucket as documents that have the value `2000-01-01`. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-categorize-text-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-categorize-text-aggregation.md new file mode 100644 index 0000000000000..4f785a342da34 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-categorize-text-aggregation.md @@ -0,0 +1,453 @@ +--- +navigation_title: "Categorize text" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-categorize-text-aggregation.html +--- + +# Categorize text aggregation [search-aggregations-bucket-categorize-text-aggregation] + + +A multi-bucket aggregation that groups semi-structured text into buckets. Each `text` field is re-analyzed using a custom analyzer. The resulting tokens are then categorized creating buckets of similarly formatted text values. This aggregation works best with machine generated text like system logs. Only the first 100 analyzed tokens are used to categorize the text. + +::::{note} +If you have considerable memory allocated to your JVM but are receiving circuit breaker exceptions from this aggregation, you may be attempting to categorize text that is poorly formatted for categorization. Consider adding `categorization_filters` or running under [sampler](/reference/data-analysis/aggregations/search-aggregations-bucket-sampler-aggregation.md), [diversified sampler](/reference/data-analysis/aggregations/search-aggregations-bucket-diversified-sampler-aggregation.md), or [random sampler](/reference/data-analysis/aggregations/search-aggregations-random-sampler-aggregation.md) to explore the created categories. +:::: + + +::::{note} +The algorithm used for categorization was completely changed in version 8.3.0. As a result this aggregation will not work in a mixed version cluster where some nodes are on version 8.3.0 or higher and others are on a version older than 8.3.0. Upgrade all nodes in your cluster to the same version if you experience an error related to this change. +:::: + + +## Parameters [bucket-categorize-text-agg-syntax] + +`categorization_analyzer` +: (Optional, object or string) The categorization analyzer specifies how the text is analyzed and tokenized before being categorized. The syntax is very similar to that used to define the `analyzer` in the [Analyze endpoint](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze). This property cannot be used at the same time as `categorization_filters`. + + The `categorization_analyzer` field can be specified either as a string or as an object. If it is a string it must refer to a [built-in analyzer](/reference/data-analysis/text-analysis/analyzer-reference.md) or one added by another plugin. If it is an object it has the following properties: + + :::::{dropdown} Properties of `categorization_analyzer` + `char_filter` + : (array of strings or objects) One or more [character filters](/reference/data-analysis/text-analysis/character-filter-reference.md). In addition to the built-in character filters, other plugins can provide more character filters. This property is optional. If it is not specified, no character filters are applied prior to categorization. If you are customizing some other aspect of the analyzer and you need to achieve the equivalent of `categorization_filters` (which are not permitted when some other aspect of the analyzer is customized), add them here as [pattern replace character filters](/reference/data-analysis/text-analysis/analysis-pattern-replace-charfilter.md). + + `tokenizer` + : (string or object) The name or definition of the [tokenizer](/reference/data-analysis/text-analysis/tokenizer-reference.md) to use after character filters are applied. This property is compulsory if `categorization_analyzer` is specified as an object. Machine learning provides a tokenizer called `ml_standard` that tokenizes in a way that has been determined to produce good categorization results on a variety of log file formats for logs in English. If you want to use that tokenizer but change the character or token filters, specify `"tokenizer": "ml_standard"` in your `categorization_analyzer`. Additionally, the `ml_classic` tokenizer is available, which tokenizes in the same way as the non-customizable tokenizer in old versions of the product (before 6.2). `ml_classic` was the default categorization tokenizer in versions 6.2 to 7.13, so if you need categorization identical to the default for jobs created in these versions, specify `"tokenizer": "ml_classic"` in your `categorization_analyzer`. + + ::::{note} + From {{es}} 8.10.0, a new version number is used to track the configuration and state changes in the {{ml}} plugin. This new version number is decoupled from the product version and will increment independently. + :::: + + + `filter` + : (array of strings or objects) One or more [token filters](/reference/data-analysis/text-analysis/token-filter-reference.md). In addition to the built-in token filters, other plugins can provide more token filters. This property is optional. If it is not specified, no token filters are applied prior to categorization. + + ::::: + + +`categorization_filters` +: (Optional, array of strings) This property expects an array of regular expressions. The expressions are used to filter out matching sequences from the categorization field values. You can use this functionality to fine tune the categorization by excluding sequences from consideration when categories are defined. For example, you can exclude SQL statements that appear in your log files. This property cannot be used at the same time as `categorization_analyzer`. If you only want to define simple regular expression filters that are applied prior to tokenization, setting this property is the easiest method. If you also want to customize the tokenizer or post-tokenization filtering, use the `categorization_analyzer` property instead and include the filters as `pattern_replace` character filters. + +`field` +: (Required, string) The semi-structured text field to categorize. + +`max_matched_tokens` +: (Optional, integer) This parameter does nothing now, but is permitted for compatibility with the original pre-8.3.0 implementation. + +`max_unique_tokens` +: (Optional, integer) This parameter does nothing now, but is permitted for compatibility with the original pre-8.3.0 implementation. + +`min_doc_count` +: (Optional, integer) The minimum number of documents for a bucket to be returned to the results. + +`shard_min_doc_count` +: (Optional, integer) The minimum number of documents for a bucket to be returned from the shard before merging. + +`shard_size` +: (Optional, integer) The number of categorization buckets to return from each shard before merging all the results. + +`similarity_threshold` +: (Optional, integer, default: `70`) The minimum percentage of token weight that must match for text to be added to the category bucket. Must be between 1 and 100. The larger the value the narrower the categories. Larger values will increase memory usage and create narrower categories. + +`size` +: (Optional, integer, default: `10`) The number of buckets to return. + + +## Response body [bucket-categorize-text-agg-response] + +`key` +: (string) Consists of the tokens (extracted by the `categorization_analyzer`) that are common to all values of the input field included in the category. + +`doc_count` +: (integer) Number of documents matching the category. + +`max_matching_length` +: (integer) Categories from short messages containing few tokens may also match categories containing many tokens derived from much longer messages. `max_matching_length` is an indication of the maximum length of messages that should be considered to belong to the category. When searching for messages that match the category, any messages longer than `max_matching_length` should be excluded. Use this field to prevent a search for members of a category of short messages from matching much longer ones. + +`regex` +: (string) A regular expression that will match all values of the input field included in the category. It is possible that the `regex` does not incorporate every term in `key`, if ordering varies between the values included in the category. However, in simple cases the `regex` will be the ordered terms concatenated into a regular expression that allows for arbitrary sections in between them. It is not recommended to use the `regex` as the primary mechanism for searching for the original documents that were categorized. Search using a regular expression is very slow. Instead the terms in the `key` field should be used to search for matching documents, as a terms search can use the inverted index and hence be much faster. However, there may be situations where it is useful to use the `regex` field to test whether a small set of messages that have not been indexed match the category, or to confirm that the terms in the `key` occur in the correct order in all the matched documents. + + +## Basic use [_basic_use] + +::::{warning} +Re-analyzing *large* result sets will require a lot of time and memory. This aggregation should be used in conjunction with [Async search](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-async-search-submit). Additionally, you may consider using the aggregation as a child of either the [sampler](/reference/data-analysis/aggregations/search-aggregations-bucket-sampler-aggregation.md) or [diversified sampler](/reference/data-analysis/aggregations/search-aggregations-bucket-diversified-sampler-aggregation.md) aggregation. This will typically improve speed and memory use. +:::: + + +Example: + +```console +POST log-messages/_search?filter_path=aggregations +{ + "aggs": { + "categories": { + "categorize_text": { + "field": "message" + } + } + } +} +``` + +Response: + +```console-result +{ + "aggregations" : { + "categories" : { + "buckets" : [ + { + "doc_count" : 3, + "key" : "Node shutting down", + "regex" : ".*?Node.+?shutting.+?down.*?", + "max_matching_length" : 49 + }, + { + "doc_count" : 1, + "key" : "Node starting up", + "regex" : ".*?Node.+?starting.+?up.*?", + "max_matching_length" : 47 + }, + { + "doc_count" : 1, + "key" : "User foo_325 logging on", + "regex" : ".*?User.+?foo_325.+?logging.+?on.*?", + "max_matching_length" : 52 + }, + { + "doc_count" : 1, + "key" : "User foo_864 logged off", + "regex" : ".*?User.+?foo_864.+?logged.+?off.*?", + "max_matching_length" : 52 + } + ] + } + } +} +``` + +Here is an example using `categorization_filters` + +```console +POST log-messages/_search?filter_path=aggregations +{ + "aggs": { + "categories": { + "categorize_text": { + "field": "message", + "categorization_filters": ["\\w+\\_\\d{3}"] <1> + } + } + } +} +``` + +1. The filters to apply to the analyzed tokens. It filters out tokens like `bar_123`. + + +Note how the `foo_` tokens are not part of the category results + +```console-result +{ + "aggregations" : { + "categories" : { + "buckets" : [ + { + "doc_count" : 3, + "key" : "Node shutting down", + "regex" : ".*?Node.+?shutting.+?down.*?", + "max_matching_length" : 49 + }, + { + "doc_count" : 1, + "key" : "Node starting up", + "regex" : ".*?Node.+?starting.+?up.*?", + "max_matching_length" : 47 + }, + { + "doc_count" : 1, + "key" : "User logged off", + "regex" : ".*?User.+?logged.+?off.*?", + "max_matching_length" : 52 + }, + { + "doc_count" : 1, + "key" : "User logging on", + "regex" : ".*?User.+?logging.+?on.*?", + "max_matching_length" : 52 + } + ] + } + } +} +``` + +Here is an example using `categorization_filters`. The default analyzer uses the `ml_standard` tokenizer which is similar to a whitespace tokenizer but filters out tokens that could be interpreted as hexadecimal numbers. The default analyzer also uses the `first_line_with_letters` character filter, so that only the first meaningful line of multi-line messages is considered. But, it may be that a token is a known highly-variable token (formatted usernames, emails, etc.). In that case, it is good to supply custom `categorization_filters` to filter out those tokens for better categories. These filters may also reduce memory usage as fewer tokens are held in memory for the categories. (If there are sufficient examples of different usernames, emails, etc., then categories will form that naturally discard them as variables, but for small input data where only one example exists this won’t happen.) + +```console +POST log-messages/_search?filter_path=aggregations +{ + "aggs": { + "categories": { + "categorize_text": { + "field": "message", + "categorization_filters": ["\\w+\\_\\d{3}"], <1> + "similarity_threshold": 11 <2> + } + } + } +} +``` + +1. The filters to apply to the analyzed tokens. It filters out tokens like `bar_123`. +2. Require 11% of token weight to match before adding a message to an existing category rather than creating a new one. + + +The resulting categories are now very broad, merging the log groups. (A `similarity_threshold` of 11% is generally too low. Settings over 50% are usually better.) + +```console-result +{ + "aggregations" : { + "categories" : { + "buckets" : [ + { + "doc_count" : 4, + "key" : "Node", + "regex" : ".*?Node.*?", + "max_matching_length" : 49 + }, + { + "doc_count" : 2, + "key" : "User", + "regex" : ".*?User.*?", + "max_matching_length" : 52 + } + ] + } + } +} +``` + +This aggregation can have both sub-aggregations and itself be a sub-aggregation. This allows gathering the top daily categories and the top sample doc as below. + +```console +POST log-messages/_search?filter_path=aggregations +{ + "aggs": { + "daily": { + "date_histogram": { + "field": "time", + "fixed_interval": "1d" + }, + "aggs": { + "categories": { + "categorize_text": { + "field": "message", + "categorization_filters": ["\\w+\\_\\d{3}"] + }, + "aggs": { + "hit": { + "top_hits": { + "size": 1, + "sort": ["time"], + "_source": "message" + } + } + } + } + } + } + } +} +``` + +```console-result +{ + "aggregations" : { + "daily" : { + "buckets" : [ + { + "key_as_string" : "2016-02-07T00:00:00.000Z", + "key" : 1454803200000, + "doc_count" : 3, + "categories" : { + "buckets" : [ + { + "doc_count" : 2, + "key" : "Node shutting down", + "regex" : ".*?Node.+?shutting.+?down.*?", + "max_matching_length" : 49, + "hit" : { + "hits" : { + "total" : { + "value" : 2, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "log-messages", + "_id" : "1", + "_score" : null, + "_source" : { + "message" : "2016-02-07T00:00:00+0000 Node 3 shutting down" + }, + "sort" : [ + 1454803260000 + ] + } + ] + } + } + }, + { + "doc_count" : 1, + "key" : "Node starting up", + "regex" : ".*?Node.+?starting.+?up.*?", + "max_matching_length" : 47, + "hit" : { + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "log-messages", + "_id" : "2", + "_score" : null, + "_source" : { + "message" : "2016-02-07T00:00:00+0000 Node 5 starting up" + }, + "sort" : [ + 1454803320000 + ] + } + ] + } + } + } + ] + } + }, + { + "key_as_string" : "2016-02-08T00:00:00.000Z", + "key" : 1454889600000, + "doc_count" : 3, + "categories" : { + "buckets" : [ + { + "doc_count" : 1, + "key" : "Node shutting down", + "regex" : ".*?Node.+?shutting.+?down.*?", + "max_matching_length" : 49, + "hit" : { + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "log-messages", + "_id" : "4", + "_score" : null, + "_source" : { + "message" : "2016-02-08T00:00:00+0000 Node 5 shutting down" + }, + "sort" : [ + 1454889660000 + ] + } + ] + } + } + }, + { + "doc_count" : 1, + "key" : "User logged off", + "regex" : ".*?User.+?logged.+?off.*?", + "max_matching_length" : 52, + "hit" : { + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "log-messages", + "_id" : "6", + "_score" : null, + "_source" : { + "message" : "2016-02-08T00:00:00+0000 User foo_864 logged off" + }, + "sort" : [ + 1454889840000 + ] + } + ] + } + } + }, + { + "doc_count" : 1, + "key" : "User logging on", + "regex" : ".*?User.+?logging.+?on.*?", + "max_matching_length" : 52, + "hit" : { + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "log-messages", + "_id" : "5", + "_score" : null, + "_source" : { + "message" : "2016-02-08T00:00:00+0000 User foo_325 logging on" + }, + "sort" : [ + 1454889720000 + ] + } + ] + } + } + } + ] + } + } + ] + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-children-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-children-aggregation.md new file mode 100644 index 0000000000000..9d860fa8873e2 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-children-aggregation.md @@ -0,0 +1,223 @@ +--- +navigation_title: "Children" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-children-aggregation.html +--- + +# Children aggregation [search-aggregations-bucket-children-aggregation] + + +A special single bucket aggregation that selects child documents that have the specified type, as defined in a [`join` field](/reference/elasticsearch/mapping-reference/parent-join.md). + +This aggregation has a single option: + +* `type` - The child type that should be selected. + +For example, let’s say we have an index of questions and answers. The answer type has the following `join` field in the mapping: + +$$$children-aggregation-example$$$ + +```console +PUT child_example +{ + "mappings": { + "properties": { + "join": { + "type": "join", + "relations": { + "question": "answer" + } + } + } + } +} +``` + +The `question` document contain a tag field and the `answer` documents contain an owner field. With the `children` aggregation the tag buckets can be mapped to the owner buckets in a single request even though the two fields exist in two different kinds of documents. + +An example of a question document: + +```console +PUT child_example/_doc/1 +{ + "join": { + "name": "question" + }, + "body": "

I have Windows 2003 server and i bought a new Windows 2008 server...", + "title": "Whats the best way to file transfer my site from server to a newer one?", + "tags": [ + "windows-server-2003", + "windows-server-2008", + "file-transfer" + ] +} +``` + +Examples of `answer` documents: + +```console +PUT child_example/_doc/2?routing=1 +{ + "join": { + "name": "answer", + "parent": "1" + }, + "owner": { + "location": "Norfolk, United Kingdom", + "display_name": "Sam", + "id": 48 + }, + "body": "

Unfortunately you're pretty much limited to FTP...", + "creation_date": "2009-05-04T13:45:37.030" +} + +PUT child_example/_doc/3?routing=1&refresh +{ + "join": { + "name": "answer", + "parent": "1" + }, + "owner": { + "location": "Norfolk, United Kingdom", + "display_name": "Troll", + "id": 49 + }, + "body": "

Use Linux...", + "creation_date": "2009-05-05T13:45:37.030" +} +``` + +The following request can be built that connects the two together: + +```console +POST child_example/_search?size=0 +{ + "aggs": { + "top-tags": { + "terms": { + "field": "tags.keyword", + "size": 10 + }, + "aggs": { + "to-answers": { + "children": { + "type" : "answer" <1> + }, + "aggs": { + "top-names": { + "terms": { + "field": "owner.display_name.keyword", + "size": 10 + } + } + } + } + } + } + } +} +``` + +1. The `type` points to type / mapping with the name `answer`. + + +The above example returns the top question tags and per tag the top answer owners. + +Possible response: + +```console-result +{ + "took": 25, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped" : 0, + "failed": 0 + }, + "hits": { + "total" : { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "top-tags": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "file-transfer", + "doc_count": 1, <1> + "to-answers": { + "doc_count": 2, <2> + "top-names": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "Sam", + "doc_count": 1 + }, + { + "key": "Troll", + "doc_count": 1 + } + ] + } + } + }, + { + "key": "windows-server-2003", + "doc_count": 1, <1> + "to-answers": { + "doc_count": 2, <2> + "top-names": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "Sam", + "doc_count": 1 + }, + { + "key": "Troll", + "doc_count": 1 + } + ] + } + } + }, + { + "key": "windows-server-2008", + "doc_count": 1, <1> + "to-answers": { + "doc_count": 2, <2> + "top-names": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "Sam", + "doc_count": 1 + }, + { + "key": "Troll", + "doc_count": 1 + } + ] + } + } + } + ] + } + } +} +``` + +1. The number of question documents with the tag `file-transfer`, `windows-server-2003`, etc. +2. The number of answer documents that are related to question documents with the tag `file-transfer`, `windows-server-2003`, etc. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-composite-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-composite-aggregation.md new file mode 100644 index 0000000000000..cd477c51603c7 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-composite-aggregation.md @@ -0,0 +1,776 @@ +--- +navigation_title: "Composite" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-composite-aggregation.html +--- + +# Composite aggregation [search-aggregations-bucket-composite-aggregation] + + +::::{warning} +The composite aggregation is expensive. Load test your application before deploying a composite aggregation in production. +:::: + + +A multi-bucket aggregation that creates composite buckets from different sources. + +Unlike the other `multi-bucket` aggregations, you can use the `composite` aggregation to paginate **all** buckets from a multi-level aggregation efficiently. This aggregation provides a way to stream **all** buckets of a specific aggregation, similar to what [scroll](/reference/elasticsearch/rest-apis/paginate-search-results.md#scroll-search-results) does for documents. + +The composite buckets are built from the combinations of the values extracted/created for each document and each combination is considered as a composite bucket. + +For example, consider the following document: + +```js +{ + "keyword": ["foo", "bar"], + "number": [23, 65, 76] +} +``` + +Using `keyword` and `number` as source fields for the aggregation results in the following composite buckets: + +```js +{ "keyword": "foo", "number": 23 } +{ "keyword": "foo", "number": 65 } +{ "keyword": "foo", "number": 76 } +{ "keyword": "bar", "number": 23 } +{ "keyword": "bar", "number": 65 } +{ "keyword": "bar", "number": 76 } +``` + +## Value sources [_value_sources] + +The `sources` parameter defines the source fields to use when building composite buckets. The order that the `sources` are defined controls the order that the keys are returned. + +::::{note} +You must use a unique name when defining `sources`. +:::: + + +The `sources` parameter can be any of the following types: + +* [Terms](#_terms) +* [Histogram](#_histogram) +* [Date histogram](#_date_histogram) +* [GeoTile grid](#_geotile_grid) + +### Terms [_terms] + +The `terms` value source is similar to a simple `terms` aggregation. The values are extracted from a field exactly like the `terms` aggregation. + +Example: + +$$$composite-aggregation-terms-field-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "product": { "terms": { "field": "product" } } } + ] + } + } + } +} +``` + +Like the `terms` aggregation, it’s possible to use a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md) to create values for the composite buckets: + +$$$composite-aggregation-terms-runtime-field-example$$$ + +```console +GET /_search +{ + "runtime_mappings": { + "day_of_week": { + "type": "keyword", + "script": """ + emit(doc['timestamp'].value.dayOfWeekEnum + .getDisplayName(TextStyle.FULL, Locale.ENGLISH)) + """ + } + }, + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { + "dow": { + "terms": { "field": "day_of_week" } + } + } + ] + } + } + } +} +``` + +Although similar, the `terms` value source doesn’t support the same set of parameters as the `terms` aggregation. For other supported value source parameters, see: + +* [Order](#_order) +* [Missing bucket](#_missing_bucket) + + +### Histogram [_histogram] + +The `histogram` value source can be applied on numeric values to build fixed size interval over the values. The `interval` parameter defines how the numeric values should be transformed. For instance an `interval` set to 5 will translate any numeric values to its closest interval, a value of `101` would be translated to `100` which is the key for the interval between 100 and 105. + +Example: + +$$$composite-aggregation-histogram-field-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "histo": { "histogram": { "field": "price", "interval": 5 } } } + ] + } + } + } +} +``` + +Like the `histogram` aggregation it’s possible to use a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md) to create values for the composite buckets: + +$$$composite-aggregation-histogram-runtime-field-example$$$ + +```console +GET /_search +{ + "runtime_mappings": { + "price.discounted": { + "type": "double", + "script": """ + double price = doc['price'].value; + if (doc['product'].value == 'mad max') { + price *= 0.8; + } + emit(price); + """ + } + }, + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { + "price": { + "histogram": { + "interval": 5, + "field": "price.discounted" + } + } + } + ] + } + } + } +} +``` + + +### Date histogram [_date_histogram] + +The `date_histogram` is similar to the `histogram` value source except that the interval is specified by date/time expression: + +$$$composite-aggregation-datehistogram-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d" } } } + ] + } + } + } +} +``` + +The example above creates an interval per day and translates all `timestamp` values to the start of its closest intervals. Available expressions for interval: `year`, `quarter`, `month`, `week`, `day`, `hour`, `minute`, `second` + +Time values can also be specified via abbreviations supported by [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units) parsing. Note that fractional time values are not supported, but you can address this by shifting to another time unit (e.g., `1.5h` could instead be specified as `90m`). + +**Format** + +Internally, a date is represented as a 64 bit number representing a timestamp in milliseconds-since-the-epoch. These timestamps are returned as the bucket keys. It is possible to return a formatted date string instead using the format specified with the format parameter: + +$$$composite-aggregation-datehistogram-format-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { + "date": { + "date_histogram": { + "field": "timestamp", + "calendar_interval": "1d", + "format": "yyyy-MM-dd" <1> + } + } + } + ] + } + } + } +} +``` + +1. Supports expressive date [format pattern](/reference/data-analysis/aggregations/search-aggregations-bucket-daterange-aggregation.md#date-format-pattern) + + +**Time Zone** + +Date-times are stored in Elasticsearch in UTC. By default, all bucketing and rounding is also done in UTC. The `time_zone` parameter can be used to indicate that bucketing should use a different time zone. + +Time zones may either be specified as an ISO 8601 UTC offset (e.g. `+01:00` or `-08:00`) or as a timezone id, an identifier used in the TZ database like `America/Los_Angeles`. + +**Offset** + +Use the `offset` parameter to change the start value of each bucket by the specified positive (`+`) or negative offset (`-`) duration, such as `1h` for an hour, or `1d` for a day. See [Time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units) for more possible time duration options. + +For example, when using an interval of `day`, each bucket runs from midnight to midnight. Setting the `offset` parameter to `+6h` changes each bucket to run from 6am to 6am: + +$$$composite-aggregation-datehistogram-offset-example$$$ + +```console +PUT my-index-000001/_doc/1?refresh +{ + "date": "2015-10-01T05:30:00Z" +} + +PUT my-index-000001/_doc/2?refresh +{ + "date": "2015-10-01T06:30:00Z" +} + +GET my-index-000001/_search?size=0 +{ + "aggs": { + "my_buckets": { + "composite" : { + "sources" : [ + { + "date": { + "date_histogram" : { + "field": "date", + "calendar_interval": "day", + "offset": "+6h", + "format": "iso8601" + } + } + } + ] + } + } + } +} +``` + +Instead of a single bucket starting at midnight, the above request groups the documents into buckets starting at 6am: + +```console-result +{ + ... + "aggregations": { + "my_buckets": { + "after_key": { "date": "2015-10-01T06:00:00.000Z" }, + "buckets": [ + { + "key": { "date": "2015-09-30T06:00:00.000Z" }, + "doc_count": 1 + }, + { + "key": { "date": "2015-10-01T06:00:00.000Z" }, + "doc_count": 1 + } + ] + } + } +} +``` + +::::{note} +The start `offset` of each bucket is calculated after `time_zone` adjustments have been made. +:::: + + + +### GeoTile grid [_geotile_grid] + +The `geotile_grid` value source works on `geo_point` fields and groups points into buckets that represent cells in a grid. The resulting grid can be sparse and only contains cells that have matching data. Each cell corresponds to a [map tile](https://en.wikipedia.org/wiki/Tiled_web_map) as used by many online map sites. Each cell is labeled using a `"{{zoom}}/{x}/{{y}}"` format, where zoom is equal to the user-specified precision. + +$$$composite-aggregation-geotilegrid-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "tile": { "geotile_grid": { "field": "location", "precision": 8 } } } + ] + } + } + } +} +``` + +**Precision** + +The highest-precision geotile of length 29 produces cells that cover less than 10cm by 10cm of land. This precision is uniquely suited for composite aggregations as each tile does not have to be generated and loaded in memory. + +See [Zoom level documentation](https://wiki.openstreetmap.org/wiki/Zoom_levels) on how precision (zoom) correlates to size on the ground. Precision for this aggregation can be between 0 and 29, inclusive. + +**Bounding box filtering** + +The geotile source can optionally be constrained to a specific geo bounding box, which reduces the range of tiles used. These bounds are useful when only a specific part of a geographical area needs high precision tiling. + +$$$composite-aggregation-geotilegrid-boundingbox-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { + "tile": { + "geotile_grid": { + "field": "location", + "precision": 22, + "bounds": { + "top_left": "POINT (4.9 52.4)", + "bottom_right": "POINT (5.0 52.3)" + } + } + } + } + ] + } + } + } +} +``` + + +### Mixing different value sources [_mixing_different_value_sources] + +The `sources` parameter accepts an array of value sources. It is possible to mix different value sources to create composite buckets. For example: + +$$$composite-aggregation-mixing-sources-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d" } } }, + { "product": { "terms": { "field": "product" } } } + ] + } + } + } +} +``` + +This will create composite buckets from the values created by two value sources, a `date_histogram` and a `terms`. Each bucket is composed of two values, one for each value source defined in the aggregation. Any type of combinations is allowed and the order in the array is preserved in the composite buckets. + +$$$composite-aggregation-mixing-three-sources-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "shop": { "terms": { "field": "shop" } } }, + { "product": { "terms": { "field": "product" } } }, + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d" } } } + ] + } + } + } +} +``` + + + +## Order [_order] + +By default the composite buckets are sorted by their natural ordering. Values are sorted in ascending order of their values. When multiple value sources are requested, the ordering is done per value source, the first value of the composite bucket is compared to the first value of the other composite bucket and if they are equals the next values in the composite bucket are used for tie-breaking. This means that the composite bucket `[foo, 100]` is considered smaller than `[foobar, 0]` because `foo` is considered smaller than `foobar`. It is possible to define the direction of the sort for each value source by setting `order` to `asc` (default value) or `desc` (descending order) directly in the value source definition. For example: + +$$$composite-aggregation-order-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d", "order": "desc" } } }, + { "product": { "terms": { "field": "product", "order": "asc" } } } + ] + } + } + } +} +``` + +... will sort the composite bucket in descending order when comparing values from the `date_histogram` source and in ascending order when comparing values from the `terms` source. + + +## Missing bucket [_missing_bucket] + +By default documents without a value for a given source are ignored. It is possible to include them in the response by setting `missing_bucket` to `true` (defaults to `false`): + +$$$composite-aggregation-missing-bucket-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [{ + "product_name": { + "terms": { + "field": "product", + "missing_bucket": true, + "missing_order": "last" + } + } + }] + } + } + } +} +``` + +In the above example, the `product_name` source emits an explicit `null` bucket for documents without a `product` value. This bucket is placed last. + +You can control the position of the `null` bucket using the optional `missing_order` parameter. If `missing_order` is `first` or `last`, the `null` bucket is placed in the respective first or last position. If `missing_order` is omitted or `default`, the source’s `order` determines the bucket’s position. If `order` is `asc` (ascending), the bucket is in the first position. If `order` is `desc` (descending), the bucket is in the last position. + + +## Size [_size] + +The `size` parameter can be set to define how many composite buckets should be returned. Each composite bucket is considered as a single bucket, so setting a size of 10 will return the first 10 composite buckets created from the value sources. The response contains the values for each composite bucket in an array containing the values extracted from each value source. Defaults to `10`. + + +## Pagination [_pagination] + +If the number of composite buckets is too high (or unknown) to be returned in a single response it is possible to split the retrieval in multiple requests. Since the composite buckets are flat by nature, the requested `size` is exactly the number of composite buckets that will be returned in the response (assuming that they are at least `size` composite buckets to return). If all composite buckets should be retrieved it is preferable to use a small size (`100` or `1000` for instance) and then use the `after` parameter to retrieve the next results. For example: + +$$$composite-aggregation-after-key-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "size": 2, + "sources": [ + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d" } } }, + { "product": { "terms": { "field": "product" } } } + ] + } + } + } +} +``` + +... returns: + +```console-result +{ + ... + "aggregations": { + "my_buckets": { + "after_key": { + "date": 1494288000000, + "product": "mad max" + }, + "buckets": [ + { + "key": { + "date": 1494201600000, + "product": "rocky" + }, + "doc_count": 1 + }, + { + "key": { + "date": 1494288000000, + "product": "mad max" + }, + "doc_count": 2 + } + ] + } + } +} +``` + +To get the next set of buckets, resend the same aggregation with the `after` parameter set to the `after_key` value returned in the response. For example, this request uses the `after_key` value provided in the previous response: + +$$$composite-aggregation-after-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "size": 2, + "sources": [ + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d", "order": "desc" } } }, + { "product": { "terms": { "field": "product", "order": "asc" } } } + ], + "after": { "date": 1494288000000, "product": "mad max" } <1> + } + } + } +} +``` + +1. Should restrict the aggregation to buckets that sort **after** the provided values. + + +::::{note} +The `after_key` is **usually** the key to the last bucket returned in the response, but that isn’t guaranteed. Always use the returned `after_key` instead of deriving it from the buckets. +:::: + + + +## Early termination [_early_termination] + +For optimal performance the [index sort](/reference/elasticsearch/index-settings/sorting.md) should be set on the index so that it matches parts or fully the source order in the composite aggregation. For instance the following index sort: + +```console +PUT my-index-000001 +{ + "settings": { + "index": { + "sort.field": [ "username", "timestamp" ], <1> + "sort.order": [ "asc", "desc" ] <2> + } + }, + "mappings": { + "properties": { + "username": { + "type": "keyword", + "doc_values": true + }, + "timestamp": { + "type": "date" + } + } + } +} +``` + +1. This index is sorted by `username` first then by `timestamp`. +2. …​ in ascending order for the `username` field and in descending order for the `timestamp` field.1. could be used to optimize these composite aggregations: + + + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "user_name": { "terms": { "field": "user_name" } } } <1> + ] + } + } + } +} +``` + +1. `user_name` is a prefix of the index sort and the order matches (`asc`). + + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "user_name": { "terms": { "field": "user_name" } } }, <1> + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d", "order": "desc" } } } <2> + ] + } + } + } +} +``` + +1. `user_name` is a prefix of the index sort and the order matches (`asc`). +2. `timestamp` matches also the prefix and the order matches (`desc`). + + +In order to optimize the early termination it is advised to set `track_total_hits` in the request to `false`. The number of total hits that match the request can be retrieved on the first request and it would be costly to compute this number on every page: + +```console +GET /_search +{ + "size": 0, + "track_total_hits": false, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "user_name": { "terms": { "field": "user_name" } } }, + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d", "order": "desc" } } } + ] + } + } + } +} +``` + +Note that the order of the source is important, in the example below switching the `user_name` with the `timestamp` would deactivate the sort optimization since this configuration wouldn’t match the index sort specification. If the order of sources do not matter for your use case you can follow these simple guidelines: + +* Put the fields with the highest cardinality first. +* Make sure that the order of the field matches the order of the index sort. +* Put multi-valued fields last since they cannot be used for early termination. + +::::{warning} +[index sort](/reference/elasticsearch/index-settings/sorting.md) can slowdown indexing, it is very important to test index sorting with your specific use case and dataset to ensure that it matches your requirement. If it doesn’t note that `composite` aggregations will also try to early terminate on non-sorted indices if the query matches all document (`match_all` query). +:::: + + + +## Sub-aggregations [_sub_aggregations] + +Like any `multi-bucket` aggregations the `composite` aggregation can hold sub-aggregations. These sub-aggregations can be used to compute other buckets or statistics on each composite bucket created by this parent aggregation. For instance the following example computes the average value of a field per composite bucket: + +$$$composite-aggregation-subaggregations-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "my_buckets": { + "composite": { + "sources": [ + { "date": { "date_histogram": { "field": "timestamp", "calendar_interval": "1d", "order": "desc" } } }, + { "product": { "terms": { "field": "product" } } } + ] + }, + "aggregations": { + "the_avg": { + "avg": { "field": "price" } + } + } + } + } +} +``` + +... returns: + +```console-result +{ + ... + "aggregations": { + "my_buckets": { + "after_key": { + "date": 1494201600000, + "product": "rocky" + }, + "buckets": [ + { + "key": { + "date": 1494460800000, + "product": "apocalypse now" + }, + "doc_count": 1, + "the_avg": { + "value": 10.0 + } + }, + { + "key": { + "date": 1494374400000, + "product": "mad max" + }, + "doc_count": 1, + "the_avg": { + "value": 27.0 + } + }, + { + "key": { + "date": 1494288000000, + "product": "mad max" + }, + "doc_count": 2, + "the_avg": { + "value": 22.5 + } + }, + { + "key": { + "date": 1494201600000, + "product": "rocky" + }, + "doc_count": 1, + "the_avg": { + "value": 10.0 + } + } + ] + } + } +} +``` + + +## Pipeline aggregations [search-aggregations-bucket-composite-aggregation-pipeline-aggregations] + +The composite agg is not currently compatible with pipeline aggregations, nor does it make sense in most cases. E.g. due to the paging nature of composite aggs, a single logical partition (one day for example) might be spread over multiple pages. Since pipeline aggregations are purely post-processing on the final list of buckets, running something like a derivative on a composite page could lead to inaccurate results as it is only taking into account a "partial" result on that page. + +Pipeline aggs that are self contained to a single bucket (such as `bucket_selector`) might be supported in the future. diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-correlation-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-correlation-aggregation.md new file mode 100644 index 0000000000000..33b71364f174d --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-correlation-aggregation.md @@ -0,0 +1,300 @@ +--- +navigation_title: "Bucket correlation" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-correlation-aggregation.html +--- + +# Bucket correlation aggregation [search-aggregations-bucket-correlation-aggregation] + + +A sibling pipeline aggregation which executes a correlation function on the configured sibling multi-bucket aggregation. + +## Parameters [bucket-correlation-agg-syntax] + +`buckets_path` +: (Required, string) Path to the buckets that contain one set of values to correlate. For syntax, see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax). + +`function` +: (Required, object) The correlation function to execute. + + ::::{dropdown} Properties of `function` + `count_correlation` + : (Required*, object) The configuration to calculate a count correlation. This function is designed for determining the correlation of a term value and a given metric. Consequently, it needs to meet the following requirements. + + * The `buckets_path` must point to a `_count` metric. + * The total count of all the `bucket_path` count values must be less than or equal to `indicator.doc_count`. + * When utilizing this function, an initial calculation to gather the required `indicator` values is required. + + ::::{dropdown} Properties of `count_correlation` + `indicator` + : (Required, object) The indicator with which to correlate the configured `bucket_path` values. + + ::::{dropdown} Properties of `indicator` + `doc_count` + : (Required, integer) The total number of documents that initially created the `expectations`. It’s required to be greater than or equal to the sum of all values in the `buckets_path` as this is the originating superset of data to which the term values are correlated. + + `expectations` + : (Required, array) An array of numbers with which to correlate the configured `bucket_path` values. The length of this value must always equal the number of buckets returned by the `bucket_path`. + + `fractions` + : (Optional, array) An array of fractions to use when averaging and calculating variance. This should be used if the pre-calculated data and the `buckets_path` have known gaps. The length of `fractions`, if provided, must equal `expectations`. + + :::: + + + :::: + + + :::: + + + +## Syntax [_syntax_8] + +A `bucket_correlation` aggregation looks like this in isolation: + +```js +{ + "bucket_correlation": { + "buckets_path": "range_values>_count", <1> + "function": { + "count_correlation": { <2> + "indicator": { + "expectations": [...], + "doc_count": 10000 + } + } + } + } +} +``` + +1. The buckets containing the values to correlate against. +2. The correlation function definition. + + + +## Example [bucket-correlation-agg-example] + +The following snippet correlates the individual terms in the field `version` with the `latency` metric. Not shown is the pre-calculation of the `latency` indicator values, which was done utilizing the [percentiles](/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md) aggregation. + +This example is only using the 10s percentiles. + +```console +POST correlate_latency/_search?size=0&filter_path=aggregations +{ + "aggs": { + "buckets": { + "terms": { <1> + "field": "version", + "size": 2 + }, + "aggs": { + "latency_ranges": { + "range": { <2> + "field": "latency", + "ranges": [ + { "to": 0.0 }, + { "from": 0, "to": 105 }, + { "from": 105, "to": 225 }, + { "from": 225, "to": 445 }, + { "from": 445, "to": 665 }, + { "from": 665, "to": 885 }, + { "from": 885, "to": 1115 }, + { "from": 1115, "to": 1335 }, + { "from": 1335, "to": 1555 }, + { "from": 1555, "to": 1775 }, + { "from": 1775 } + ] + } + }, + "bucket_correlation": { <3> + "bucket_correlation": { + "buckets_path": "latency_ranges>_count", + "function": { + "count_correlation": { + "indicator": { + "expectations": [0, 52.5, 165, 335, 555, 775, 1000, 1225, 1445, 1665, 1775], + "doc_count": 200 + } + } + } + } + } + } + } + } +} +``` + +1. The term buckets containing a range aggregation and the bucket correlation aggregation. Both are utilized to calculate the correlation of the term values with the latency. +2. The range aggregation on the latency field. The ranges were created referencing the percentiles of the latency field. +3. The bucket correlation aggregation that calculates the correlation of the number of term values within each range and the previously calculated indicator values. + + +And the following may be the response: + +```console-result +{ + "aggregations" : { + "buckets" : { + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0, + "buckets" : [ + { + "key" : "1.0", + "doc_count" : 100, + "latency_ranges" : { + "buckets" : [ + { + "key" : "*-0.0", + "to" : 0.0, + "doc_count" : 0 + }, + { + "key" : "0.0-105.0", + "from" : 0.0, + "to" : 105.0, + "doc_count" : 1 + }, + { + "key" : "105.0-225.0", + "from" : 105.0, + "to" : 225.0, + "doc_count" : 9 + }, + { + "key" : "225.0-445.0", + "from" : 225.0, + "to" : 445.0, + "doc_count" : 0 + }, + { + "key" : "445.0-665.0", + "from" : 445.0, + "to" : 665.0, + "doc_count" : 0 + }, + { + "key" : "665.0-885.0", + "from" : 665.0, + "to" : 885.0, + "doc_count" : 0 + }, + { + "key" : "885.0-1115.0", + "from" : 885.0, + "to" : 1115.0, + "doc_count" : 10 + }, + { + "key" : "1115.0-1335.0", + "from" : 1115.0, + "to" : 1335.0, + "doc_count" : 20 + }, + { + "key" : "1335.0-1555.0", + "from" : 1335.0, + "to" : 1555.0, + "doc_count" : 20 + }, + { + "key" : "1555.0-1775.0", + "from" : 1555.0, + "to" : 1775.0, + "doc_count" : 20 + }, + { + "key" : "1775.0-*", + "from" : 1775.0, + "doc_count" : 20 + } + ] + }, + "bucket_correlation" : { + "value" : 0.8402398981360937 + } + }, + { + "key" : "2.0", + "doc_count" : 100, + "latency_ranges" : { + "buckets" : [ + { + "key" : "*-0.0", + "to" : 0.0, + "doc_count" : 0 + }, + { + "key" : "0.0-105.0", + "from" : 0.0, + "to" : 105.0, + "doc_count" : 19 + }, + { + "key" : "105.0-225.0", + "from" : 105.0, + "to" : 225.0, + "doc_count" : 11 + }, + { + "key" : "225.0-445.0", + "from" : 225.0, + "to" : 445.0, + "doc_count" : 20 + }, + { + "key" : "445.0-665.0", + "from" : 445.0, + "to" : 665.0, + "doc_count" : 20 + }, + { + "key" : "665.0-885.0", + "from" : 665.0, + "to" : 885.0, + "doc_count" : 20 + }, + { + "key" : "885.0-1115.0", + "from" : 885.0, + "to" : 1115.0, + "doc_count" : 10 + }, + { + "key" : "1115.0-1335.0", + "from" : 1115.0, + "to" : 1335.0, + "doc_count" : 0 + }, + { + "key" : "1335.0-1555.0", + "from" : 1335.0, + "to" : 1555.0, + "doc_count" : 0 + }, + { + "key" : "1555.0-1775.0", + "from" : 1555.0, + "to" : 1775.0, + "doc_count" : 0 + }, + { + "key" : "1775.0-*", + "from" : 1775.0, + "doc_count" : 0 + } + ] + }, + "bucket_correlation" : { + "value" : -0.5759855613334943 + } + } + ] + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-count-ks-test-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-count-ks-test-aggregation.md new file mode 100644 index 0000000000000..92f3e60dd10ae --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-count-ks-test-aggregation.md @@ -0,0 +1,267 @@ +--- +navigation_title: "Bucket count K-S test" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-count-ks-test-aggregation.html +--- + +# Bucket count K-S test correlation aggregation [search-aggregations-bucket-count-ks-test-aggregation] + + +A sibling pipeline aggregation which executes a two sample Kolmogorov–Smirnov test (referred to as a "K-S test" from now on) against a provided distribution, and the distribution implied by the documents counts in the configured sibling aggregation. Specifically, for some metric, assuming that the percentile intervals of the metric are known beforehand or have been computed by an aggregation, then one would use range aggregation for the sibling to compute the p-value of the distribution difference between the metric and the restriction of that metric to a subset of the documents. A natural use case is if the sibling aggregation range aggregation nested in a terms aggregation, in which case one compares the overall distribution of metric to its restriction to each term. + +## Parameters [bucket-count-ks-test-agg-syntax] + +`buckets_path` +: (Required, string) Path to the buckets that contain one set of values to correlate. Must be a `_count` path For syntax, see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax). + +`alternative` +: (Optional, list) A list of string values indicating which K-S test alternative to calculate. The valid values are: "greater", "less", "two_sided". This parameter is key for determining the K-S statistic used when calculating the K-S test. Default value is all possible alternative hypotheses. + +`fractions` +: (Optional, list) A list of doubles indicating the distribution of the samples with which to compare to the `buckets_path` results. In typical usage this is the overall proportion of documents in each bucket, which is compared with the actual document proportions in each bucket from the sibling aggregation counts. The default is to assume that overall documents are uniformly distributed on these buckets, which they would be if one used equal percentiles of a metric to define the bucket end points. + +`sampling_method` +: (Optional, string) Indicates the sampling methodology when calculating the K-S test. Note, this is sampling of the returned values. This determines the cumulative distribution function (CDF) points used comparing the two samples. Default is `upper_tail`, which emphasizes the upper end of the CDF points. Valid options are: `upper_tail`, `uniform`, and `lower_tail`. + + +## Syntax [_syntax_7] + +A `bucket_count_ks_test` aggregation looks like this in isolation: + +```js +{ + "bucket_count_ks_test": { + "buckets_path": "range_values>_count", <1> + "alternative": ["less", "greater", "two_sided"], <2> + "sampling_method": "upper_tail" <3> + } +} +``` + +1. The buckets containing the values to test against. +2. The alternatives to calculate. +3. The sampling method for the K-S statistic. + + + +## Example [bucket-count-ks-test-agg-example] + +The following snippet runs the `bucket_count_ks_test` on the individual terms in the field `version` against a uniform distribution. The uniform distribution reflects the `latency` percentile buckets. Not shown is the pre-calculation of the `latency` indicator values, which was done utilizing the [percentiles](/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md) aggregation. + +This example is only using the deciles of `latency`. + +```console +POST correlate_latency/_search?size=0&filter_path=aggregations +{ + "aggs": { + "buckets": { + "terms": { <1> + "field": "version", + "size": 2 + }, + "aggs": { + "latency_ranges": { + "range": { <2> + "field": "latency", + "ranges": [ + { "to": 0 }, + { "from": 0, "to": 105 }, + { "from": 105, "to": 225 }, + { "from": 225, "to": 445 }, + { "from": 445, "to": 665 }, + { "from": 665, "to": 885 }, + { "from": 885, "to": 1115 }, + { "from": 1115, "to": 1335 }, + { "from": 1335, "to": 1555 }, + { "from": 1555, "to": 1775 }, + { "from": 1775 } + ] + } + }, + "ks_test": { <3> + "bucket_count_ks_test": { + "buckets_path": "latency_ranges>_count", + "alternative": ["less", "greater", "two_sided"] + } + } + } + } + } +} +``` + +1. The term buckets containing a range aggregation and the bucket correlation aggregation. Both are utilized to calculate the correlation of the term values with the latency. +2. The range aggregation on the latency field. The ranges were created referencing the percentiles of the latency field. +3. The bucket count K-S test aggregation that tests if the bucket counts comes from the same distribution as `fractions`; where `fractions` is a uniform distribution. + + +And the following may be the response: + +```console-result +{ + "aggregations" : { + "buckets" : { + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0, + "buckets" : [ + { + "key" : "1.0", + "doc_count" : 100, + "latency_ranges" : { + "buckets" : [ + { + "key" : "*-0.0", + "to" : 0.0, + "doc_count" : 0 + }, + { + "key" : "0.0-105.0", + "from" : 0.0, + "to" : 105.0, + "doc_count" : 1 + }, + { + "key" : "105.0-225.0", + "from" : 105.0, + "to" : 225.0, + "doc_count" : 9 + }, + { + "key" : "225.0-445.0", + "from" : 225.0, + "to" : 445.0, + "doc_count" : 0 + }, + { + "key" : "445.0-665.0", + "from" : 445.0, + "to" : 665.0, + "doc_count" : 0 + }, + { + "key" : "665.0-885.0", + "from" : 665.0, + "to" : 885.0, + "doc_count" : 0 + }, + { + "key" : "885.0-1115.0", + "from" : 885.0, + "to" : 1115.0, + "doc_count" : 10 + }, + { + "key" : "1115.0-1335.0", + "from" : 1115.0, + "to" : 1335.0, + "doc_count" : 20 + }, + { + "key" : "1335.0-1555.0", + "from" : 1335.0, + "to" : 1555.0, + "doc_count" : 20 + }, + { + "key" : "1555.0-1775.0", + "from" : 1555.0, + "to" : 1775.0, + "doc_count" : 20 + }, + { + "key" : "1775.0-*", + "from" : 1775.0, + "doc_count" : 20 + } + ] + }, + "ks_test" : { + "less" : 2.248673241788478E-4, + "greater" : 1.0, + "two_sided" : 5.791639181800257E-4 + } + }, + { + "key" : "2.0", + "doc_count" : 100, + "latency_ranges" : { + "buckets" : [ + { + "key" : "*-0.0", + "to" : 0.0, + "doc_count" : 0 + }, + { + "key" : "0.0-105.0", + "from" : 0.0, + "to" : 105.0, + "doc_count" : 19 + }, + { + "key" : "105.0-225.0", + "from" : 105.0, + "to" : 225.0, + "doc_count" : 11 + }, + { + "key" : "225.0-445.0", + "from" : 225.0, + "to" : 445.0, + "doc_count" : 20 + }, + { + "key" : "445.0-665.0", + "from" : 445.0, + "to" : 665.0, + "doc_count" : 20 + }, + { + "key" : "665.0-885.0", + "from" : 665.0, + "to" : 885.0, + "doc_count" : 20 + }, + { + "key" : "885.0-1115.0", + "from" : 885.0, + "to" : 1115.0, + "doc_count" : 10 + }, + { + "key" : "1115.0-1335.0", + "from" : 1115.0, + "to" : 1335.0, + "doc_count" : 0 + }, + { + "key" : "1335.0-1555.0", + "from" : 1335.0, + "to" : 1555.0, + "doc_count" : 0 + }, + { + "key" : "1555.0-1775.0", + "from" : 1555.0, + "to" : 1775.0, + "doc_count" : 0 + }, + { + "key" : "1775.0-*", + "from" : 1775.0, + "doc_count" : 0 + } + ] + }, + "ks_test" : { + "less" : 0.9642895789647244, + "greater" : 4.58718174664754E-9, + "two_sided" : 5.916656831139733E-9 + } + } + ] + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-datehistogram-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-datehistogram-aggregation.md new file mode 100644 index 0000000000000..590502c5ce3a0 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-datehistogram-aggregation.md @@ -0,0 +1,685 @@ +--- +navigation_title: "Date histogram" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html +--- + +# Date histogram aggregation [search-aggregations-bucket-datehistogram-aggregation] + + +This multi-bucket aggregation is similar to the normal [histogram](/reference/data-analysis/aggregations/search-aggregations-bucket-histogram-aggregation.md), but it can only be used with date or date range values. Because dates are represented internally in Elasticsearch as long values, it is possible, but not as accurate, to use the normal `histogram` on dates as well. The main difference in the two APIs is that here the interval can be specified using date/time expressions. Time-based data requires special support because time-based intervals are not always a fixed length. + +Like the histogram, values are rounded **down** into the closest bucket. For example, if the interval is a calendar day, `2020-01-03T07:00:01Z` is rounded to `2020-01-03T00:00:00Z`. Values are rounded as follows: + +```java +bucket_key = Math.floor(value / interval) * interval +``` + +## Calendar and fixed intervals [calendar_and_fixed_intervals] + +When configuring a date histogram aggregation, the interval can be specified in two manners: calendar-aware time intervals, and fixed time intervals. + +Calendar-aware intervals understand that daylight savings changes the length of specific days, months have different amounts of days, and leap seconds can be tacked onto a particular year. + +Fixed intervals are, by contrast, always multiples of SI units and do not change based on calendaring context. + + +## Calendar intervals [calendar_intervals] + +Calendar-aware intervals are configured with the `calendar_interval` parameter. You can specify calendar intervals using the unit name, such as `month`, or as a single unit quantity, such as `1M`. For example, `day` and `1d` are equivalent. Multiple quantities, such as `2d`, are not supported. + +The accepted calendar intervals are: + +`minute`, `1m` +: All minutes begin at 00 seconds. One minute is the interval between 00 seconds of the first minute and 00 seconds of the following minute in the specified time zone, compensating for any intervening leap seconds, so that the number of minutes and seconds past the hour is the same at the start and end. + +`hour`, `1h` +: All hours begin at 00 minutes and 00 seconds. One hour (1h) is the interval between 00:00 minutes of the first hour and 00:00 minutes of the following hour in the specified time zone, compensating for any intervening leap seconds, so that the number of minutes and seconds past the hour is the same at the start and end. + +`day`, `1d` +: All days begin at the earliest possible time, which is usually 00:00:00 (midnight). One day (1d) is the interval between the start of the day and the start of the following day in the specified time zone, compensating for any intervening time changes. + +`week`, `1w` +: One week is the interval between the start day_of_week:hour:minute:second and the same day of the week and time of the following week in the specified time zone. + +`month`, `1M` +: One month is the interval between the start day of the month and time of day and the same day of the month and time of the following month in the specified time zone, so that the day of the month and time of day are the same at the start and end. Note that the day may differ if an [`offset` is used that is longer than a month](#search-aggregations-bucket-datehistogram-offset-months). + +`quarter`, `1q` +: One quarter is the interval between the start day of the month and time of day and the same day of the month and time of day three months later, so that the day of the month and time of day are the same at the start and end.
+ +`year`, `1y` +: One year is the interval between the start day of the month and time of day and the same day of the month and time of day the following year in the specified time zone, so that the date and time are the same at the start and end.
+ +### Calendar interval examples [calendar_interval_examples] + +As an example, here is an aggregation requesting bucket intervals of a month in calendar time: + +$$$datehistogram-aggregation-calendar-interval-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "sales_over_time": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + } + } + } +} +``` + +If you attempt to use multiples of calendar units, the aggregation will fail because only singular calendar units are supported: + +$$$datehistogram-aggregation-calendar-interval-multiples-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "sales_over_time": { + "date_histogram": { + "field": "date", + "calendar_interval": "2d" + } + } + } +} +``` + +```js +{ + "error" : { + "root_cause" : [...], + "type" : "x_content_parse_exception", + "reason" : "[1:82] [date_histogram] failed to parse field [calendar_interval]", + "caused_by" : { + "type" : "illegal_argument_exception", + "reason" : "The supplied interval [2d] could not be parsed as a calendar interval.", + "stack_trace" : "java.lang.IllegalArgumentException: The supplied interval [2d] could not be parsed as a calendar interval." + } + } +} +``` + + + +## Fixed intervals [fixed_intervals] + +Fixed intervals are configured with the `fixed_interval` parameter. + +In contrast to calendar-aware intervals, fixed intervals are a fixed number of SI units and never deviate, regardless of where they fall on the calendar. One second is always composed of `1000ms`. This allows fixed intervals to be specified in any multiple of the supported units. + +However, it means fixed intervals cannot express other units such as months, since the duration of a month is not a fixed quantity. Attempting to specify a calendar interval like month or quarter will throw an exception. + +The accepted units for fixed intervals are: + +milliseconds (`ms`) +: A single millisecond. This is a very, very small interval. + +seconds (`s`) +: Defined as 1000 milliseconds each. + +minutes (`m`) +: Defined as 60 seconds each (60,000 milliseconds). All minutes begin at 00 seconds. + +hours (`h`) +: Defined as 60 minutes each (3,600,000 milliseconds). All hours begin at 00 minutes and 00 seconds. + +days (`d`) +: Defined as 24 hours (86,400,000 milliseconds). All days begin at the earliest possible time, which is usually 00:00:00 (midnight). + +### Fixed interval examples [fixed_interval_examples] + +If we try to recreate the "month" `calendar_interval` from earlier, we can approximate that with 30 fixed days: + +$$$datehistogram-aggregation-fixed-interval-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "sales_over_time": { + "date_histogram": { + "field": "date", + "fixed_interval": "30d" + } + } + } +} +``` + +But if we try to use a calendar unit that is not supported, such as weeks, we’ll get an exception: + +$$$datehistogram-aggregation-fixed-interval-unsupported-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "sales_over_time": { + "date_histogram": { + "field": "date", + "fixed_interval": "2w" + } + } + } +} +``` + +```js +{ + "error" : { + "root_cause" : [...], + "type" : "x_content_parse_exception", + "reason" : "[1:82] [date_histogram] failed to parse field [fixed_interval]", + "caused_by" : { + "type" : "illegal_argument_exception", + "reason" : "failed to parse setting [date_histogram.fixedInterval] with value [2w] as a time value: unit is missing or unrecognized", + "stack_trace" : "java.lang.IllegalArgumentException: failed to parse setting [date_histogram.fixedInterval] with value [2w] as a time value: unit is missing or unrecognized" + } + } +} +``` + + + +## Date histogram usage notes [datehistogram-aggregation-notes] + +In all cases, when the specified end time does not exist, the actual end time is the closest available time after the specified end. + +Widely distributed applications must also consider vagaries such as countries that start and stop daylight savings time at 12:01 A.M., so end up with one minute of Sunday followed by an additional 59 minutes of Saturday once a year, and countries that decide to move across the international date line. Situations like that can make irregular time zone offsets seem easy. + +As always, rigorous testing, especially around time-change events, will ensure that your time interval specification is what you intend it to be. + +::::{warning} +To avoid unexpected results, all connected servers and clients must sync to a reliable network time service. +:::: + + +::::{note} +Fractional time values are not supported, but you can address this by shifting to another time unit (e.g., `1.5h` could instead be specified as `90m`). +:::: + + +::::{note} +You can also specify time values using abbreviations supported by [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units) parsing. +:::: + + + +## Keys [datehistogram-aggregation-keys] + +Internally, a date is represented as a 64 bit number representing a timestamp in milliseconds-since-the-epoch (01/01/1970 midnight UTC). These timestamps are returned as the `key` name of the bucket. The `key_as_string` is the same timestamp converted to a formatted date string using the `format` parameter specification: + +::::{tip} +If you don’t specify `format`, the first date [format](/reference/elasticsearch/mapping-reference/mapping-date-format.md) specified in the field mapping is used. +:::: + + +$$$datehistogram-aggregation-format-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "sales_over_time": { + "date_histogram": { + "field": "date", + "calendar_interval": "1M", + "format": "yyyy-MM-dd" <1> + } + } + } +} +``` + +1. Supports expressive date [format pattern](/reference/data-analysis/aggregations/search-aggregations-bucket-daterange-aggregation.md#date-format-pattern) + + +Response: + +```console-result +{ + ... + "aggregations": { + "sales_over_time": { + "buckets": [ + { + "key_as_string": "2015-01-01", + "key": 1420070400000, + "doc_count": 3 + }, + { + "key_as_string": "2015-02-01", + "key": 1422748800000, + "doc_count": 2 + }, + { + "key_as_string": "2015-03-01", + "key": 1425168000000, + "doc_count": 2 + } + ] + } + } +} +``` + + +## Time zone [datehistogram-aggregation-time-zone] + +{{es}} stores date-times in Coordinated Universal Time (UTC). By default, all bucketing and rounding is also done in UTC. Use the `time_zone` parameter to indicate that bucketing should use a different time zone. + +When you specify a time zone, the following logic is used to determine the bucket the document belongs in: + +```java +bucket_key = localToUtc(Math.floor(utcToLocal(value) / interval) * interval)) +``` + +For example, if the interval is a calendar day and the time zone is `America/New_York`, then the date value `2020-01-03T01:00:01Z` is processed as follows: + +1. Converted to EST: `2020-01-02T20:00:01` +2. Rounded down to the nearest interval: `2020-01-02T00:00:00` +3. Converted back to UTC: `2020-01-02T05:00:00:00Z` + +When a `key_as_string` is generated for the bucket, the key value is stored in `America/New_York` time, so it’ll display as `"2020-01-02T00:00:00"`. + +You can specify time zones as an ISO 8601 UTC offset, such as `+01:00` or `-08:00`, or as an IANA time zone ID, such as `America/Los_Angeles`. + +Consider the following example: + +$$$datehistogram-aggregation-timezone-example$$$ + +```console +PUT my-index-000001/_doc/1?refresh +{ + "date": "2015-10-01T00:30:00Z" +} + +PUT my-index-000001/_doc/2?refresh +{ + "date": "2015-10-01T01:30:00Z" +} + +GET my-index-000001/_search?size=0 +{ + "aggs": { + "by_day": { + "date_histogram": { + "field": "date", + "calendar_interval": "day" + } + } + } +} +``` + +If you don’t specify a time zone, UTC is used. This would result in both of these documents being placed into the same day bucket, which starts at midnight UTC on 1 October 2015: + +```console-result +{ + ... + "aggregations": { + "by_day": { + "buckets": [ + { + "key_as_string": "2015-10-01T00:00:00.000Z", + "key": 1443657600000, + "doc_count": 2 + } + ] + } + } +} +``` + +If you specify a `time_zone` of `-01:00`, midnight in that time zone is one hour before midnight UTC: + +```console +GET my-index-000001/_search?size=0 +{ + "aggs": { + "by_day": { + "date_histogram": { + "field": "date", + "calendar_interval": "day", + "time_zone": "-01:00" + } + } + } +} +``` + +Now the first document falls into the bucket for 30 September 2015, while the second document falls into the bucket for 1 October 2015: + +```console-result +{ + ... + "aggregations": { + "by_day": { + "buckets": [ + { + "key_as_string": "2015-09-30T00:00:00.000-01:00", <1> + "key": 1443574800000, + "doc_count": 1 + }, + { + "key_as_string": "2015-10-01T00:00:00.000-01:00", <1> + "key": 1443661200000, + "doc_count": 1 + } + ] + } + } +} +``` + +1. The `key_as_string` value represents midnight on each day in the specified time zone. + + +::::{warning} +Many time zones shift their clocks for daylight savings time. Buckets close to the moment when those changes happen can have slightly different sizes than you would expect from the `calendar_interval` or `fixed_interval`. For example, consider a DST start in the `CET` time zone: on 27 March 2016 at 2am, clocks were turned forward 1 hour to 3am local time. If you use `day` as the `calendar_interval`, the bucket covering that day will only hold data for 23 hours instead of the usual 24 hours for other buckets. The same is true for shorter intervals, like a `fixed_interval` of `12h`, where you’ll have only a 11h bucket on the morning of 27 March when the DST shift happens. +:::: + + + +## Offset [search-aggregations-bucket-datehistogram-offset] + +Use the `offset` parameter to change the start value of each bucket by the specified positive (`+`) or negative offset (`-`) duration, such as `1h` for an hour, or `1d` for a day. See [Time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units) for more possible time duration options. + +For example, when using an interval of `day`, each bucket runs from midnight to midnight. Setting the `offset` parameter to `+6h` changes each bucket to run from 6am to 6am: + +$$$datehistogram-aggregation-offset-example$$$ + +```console +PUT my-index-000001/_doc/1?refresh +{ + "date": "2015-10-01T05:30:00Z" +} + +PUT my-index-000001/_doc/2?refresh +{ + "date": "2015-10-01T06:30:00Z" +} + +GET my-index-000001/_search?size=0 +{ + "aggs": { + "by_day": { + "date_histogram": { + "field": "date", + "calendar_interval": "day", + "offset": "+6h" + } + } + } +} +``` + +Instead of a single bucket starting at midnight, the above request groups the documents into buckets starting at 6am: + +```console-result +{ + ... + "aggregations": { + "by_day": { + "buckets": [ + { + "key_as_string": "2015-09-30T06:00:00.000Z", + "key": 1443592800000, + "doc_count": 1 + }, + { + "key_as_string": "2015-10-01T06:00:00.000Z", + "key": 1443679200000, + "doc_count": 1 + } + ] + } + } +} +``` + +::::{note} +The start `offset` of each bucket is calculated after `time_zone` adjustments have been made. +:::: + + +### Long offsets over calendar intervals [search-aggregations-bucket-datehistogram-offset-months] + +It is typical to use offsets in units smaller than the `calendar_interval`. For example, using offsets in hours when the interval is days, or an offset of days when the interval is months. If the calendar interval is always of a standard length, or the `offset` is less than one unit of the calendar interval (for example less than `+24h` for `days` or less than `+28d` for months), then each bucket will have a repeating start. For example `+6h` for `days` will result in all buckets starting at 6am each day. However, `+30h` will also result in buckets starting at 6am, except when crossing days that change from standard to summer-savings time or vice-versa. + +This situation is much more pronounced for months, where each month has a different length to at least one of its adjacent months. To demonstrate this, consider eight documents each with a date field on the 20th day of each of the eight months from January to August of 2022. + +When querying for a date histogram over the calendar interval of months, the response will return one bucket per month, each with a single document. Each bucket will have a key named after the first day of the month, plus any offset. For example, the offset of `+19d` will result in buckets with names like `2022-01-20`. + +$$$datehistogram-aggregation-offset-example-19d$$$ + +```console +"buckets": [ + { "key_as_string": "2022-01-20", "key": 1642636800000, "doc_count": 1 }, + { "key_as_string": "2022-02-20", "key": 1645315200000, "doc_count": 1 }, + { "key_as_string": "2022-03-20", "key": 1647734400000, "doc_count": 1 }, + { "key_as_string": "2022-04-20", "key": 1650412800000, "doc_count": 1 }, + { "key_as_string": "2022-05-20", "key": 1653004800000, "doc_count": 1 }, + { "key_as_string": "2022-06-20", "key": 1655683200000, "doc_count": 1 }, + { "key_as_string": "2022-07-20", "key": 1658275200000, "doc_count": 1 }, + { "key_as_string": "2022-08-20", "key": 1660953600000, "doc_count": 1 } +] +``` + +Increasing the offset to `+20d`, each document will appear in a bucket for the previous month, with all bucket keys ending with the same day of the month, as normal. However, further increasing to `+28d`, what used to be a February bucket has now become `"2022-03-01"`. + +$$$datehistogram-aggregation-offset-example-28d$$$ + +```console +"buckets": [ + { "key_as_string": "2021-12-29", "key": 1640736000000, "doc_count": 1 }, + { "key_as_string": "2022-01-29", "key": 1643414400000, "doc_count": 1 }, + { "key_as_string": "2022-03-01", "key": 1646092800000, "doc_count": 1 }, + { "key_as_string": "2022-03-29", "key": 1648512000000, "doc_count": 1 }, + { "key_as_string": "2022-04-29", "key": 1651190400000, "doc_count": 1 }, + { "key_as_string": "2022-05-29", "key": 1653782400000, "doc_count": 1 }, + { "key_as_string": "2022-06-29", "key": 1656460800000, "doc_count": 1 }, + { "key_as_string": "2022-07-29", "key": 1659052800000, "doc_count": 1 } +] +``` + +If we continue to increase the offset, the 30-day months will also shift into the next month, so that 3 of the 8 buckets have different days than the other five. In fact if we keep going, we will find cases where two documents appear in the same month. Documents that were originally 30 days apart can be shifted into the same 31-day month bucket. + +For example, for `+50d` we see: + +$$$datehistogram-aggregation-offset-example-50d$$$ + +```console +"buckets": [ + { "key_as_string": "2022-01-20", "key": 1642636800000, "doc_count": 1 }, + { "key_as_string": "2022-02-20", "key": 1645315200000, "doc_count": 2 }, + { "key_as_string": "2022-04-20", "key": 1650412800000, "doc_count": 2 }, + { "key_as_string": "2022-06-20", "key": 1655683200000, "doc_count": 2 }, + { "key_as_string": "2022-08-20", "key": 1660953600000, "doc_count": 1 } +] +``` + +It is therefore always important when using `offset` with `calendar_interval` bucket sizes to understand the consequences of using offsets larger than the interval size. + +More examples: + +* If the goal is to, for example, have an annual histogram where each year starts on the 5th February, you could use `calendar_interval` of `year` and `offset` of `+33d`, and each year will be shifted identically, because the offset includes only January, which is the same length every year. However, if the goal is to have the year start on the 5th March instead, this technique will not work because the offset includes February, which changes length every four years. +* If you want a quarterly histogram starting on a date within the first month of the year, it will work, but as soon as you push the start date into the second month by having an offset longer than a month, the quarters will all start on different dates. + + + +## Keyed response [date-histogram-keyed-response] + +Setting the `keyed` flag to `true` associates a unique string key with each bucket and returns the ranges as a hash rather than an array: + +$$$datehistogram-aggregation-keyed-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "sales_over_time": { + "date_histogram": { + "field": "date", + "calendar_interval": "1M", + "format": "yyyy-MM-dd", + "keyed": true + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "sales_over_time": { + "buckets": { + "2015-01-01": { + "key_as_string": "2015-01-01", + "key": 1420070400000, + "doc_count": 3 + }, + "2015-02-01": { + "key_as_string": "2015-02-01", + "key": 1422748800000, + "doc_count": 2 + }, + "2015-03-01": { + "key_as_string": "2015-03-01", + "key": 1425168000000, + "doc_count": 2 + } + } + } + } +} +``` + + +## Scripts [date-histogram-scripts] + +If the data in your documents doesn’t exactly match what you’d like to aggregate, use a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md) . For example, if the revenue for promoted sales should be recognized a day after the sale date: + +$$$datehistogram-aggregation-runtime-field$$$ + +```console +POST /sales/_search?size=0 +{ + "runtime_mappings": { + "date.promoted_is_tomorrow": { + "type": "date", + "script": """ + long date = doc['date'].value.toInstant().toEpochMilli(); + if (doc['promoted'].value) { + date += 86400; + } + emit(date); + """ + } + }, + "aggs": { + "sales_over_time": { + "date_histogram": { + "field": "date.promoted_is_tomorrow", + "calendar_interval": "1M" + } + } + } +} +``` + + +## Parameters [date-histogram-params] + +You can control the order of the returned buckets using the `order` settings and filter the returned buckets based on a `min_doc_count` setting (by default all buckets between the first bucket that matches documents and the last one are returned). This histogram also supports the `extended_bounds` setting, which enables extending the bounds of the histogram beyond the data itself, and `hard_bounds` that limits the histogram to specified bounds. For more information, see [`Extended Bounds`](/reference/data-analysis/aggregations/search-aggregations-bucket-histogram-aggregation.md#search-aggregations-bucket-histogram-aggregation-extended-bounds) and [`Hard Bounds`](/reference/data-analysis/aggregations/search-aggregations-bucket-histogram-aggregation.md#search-aggregations-bucket-histogram-aggregation-hard-bounds). + +### Missing value [date-histogram-missing-value] + +The `missing` parameter defines how to treat documents that are missing a value. By default, they are ignored, but it is also possible to treat them as if they have a value. + +$$$datehistogram-aggregation-missing-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "sale_date": { + "date_histogram": { + "field": "date", + "calendar_interval": "year", + "missing": "2000/01/01" <1> + } + } + } +} +``` + +1. Documents without a value in the `date` field will fall into the same bucket as documents that have the value `2000-01-01`. + + + +### Order [date-histogram-order] + +By default the returned buckets are sorted by their `key` ascending, but you can control the order using the `order` setting. This setting supports the same `order` functionality as [`Terms Aggregation`](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md#search-aggregations-bucket-terms-aggregation-order). + + +### Using a script to aggregate by day of the week [date-histogram-aggregate-scripts] + +When you need to aggregate the results by day of the week, run a `terms` aggregation on a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md) that returns the day of the week: + +$$$datehistogram-aggregation-day-of-week-runtime-field$$$ + +```console +POST /sales/_search?size=0 +{ + "runtime_mappings": { + "date.day_of_week": { + "type": "keyword", + "script": "emit(doc['date'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH))" + } + }, + "aggs": { + "day_of_week": { + "terms": { "field": "date.day_of_week" } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "day_of_week": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "Sunday", + "doc_count": 4 + }, + { + "key": "Thursday", + "doc_count": 3 + } + ] + } + } +} +``` + +The response will contain all the buckets having the relative day of the week as key : 1 for Monday, 2 for Tuesday…​ 7 for Sunday. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-daterange-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-daterange-aggregation.md new file mode 100644 index 0000000000000..5cb2485903f43 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-daterange-aggregation.md @@ -0,0 +1,330 @@ +--- +navigation_title: "Date range" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-daterange-aggregation.html +--- + +# Date range aggregation [search-aggregations-bucket-daterange-aggregation] + + +A range aggregation that is dedicated for date values. The main difference between this aggregation and the normal [range](/reference/data-analysis/aggregations/search-aggregations-bucket-range-aggregation.md) aggregation is that the `from` and `to` values can be expressed in [Date Math](/reference/elasticsearch/rest-apis/common-options.md#date-math) expressions, and it is also possible to specify a date format by which the `from` and `to` response fields will be returned. Note that this aggregation includes the `from` value and excludes the `to` value for each range. + +Example: + +$$$daterange-aggregation-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "range": { + "date_range": { + "field": "date", + "format": "MM-yyyy", + "ranges": [ + { "to": "now-10M/M" }, <1> + { "from": "now-10M/M" } <2> + ] + } + } + } +} +``` + +1. < now minus 10 months, rounded down to the start of the month. +2. >= now minus 10 months, rounded down to the start of the month. + + +In the example above, we created two range buckets, the first will "bucket" all documents dated prior to 10 months ago and the second will "bucket" all documents dated since 10 months ago + +Response: + +```console-result +{ + ... + "aggregations": { + "range": { + "buckets": [ + { + "to": 1.4436576E12, + "to_as_string": "10-2015", + "doc_count": 7, + "key": "*-10-2015" + }, + { + "from": 1.4436576E12, + "from_as_string": "10-2015", + "doc_count": 0, + "key": "10-2015-*" + } + ] + } + } +} +``` + +::::{warning} +If a format or date value is incomplete, the date range aggregation replaces any missing components with default values. See [Missing date components](/reference/query-languages/query-dsl-range-query.md#missing-date-components). +:::: + + +## Missing Values [_missing_values_2] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. This is done by adding a set of fieldname : value mappings to specify default values per field. + +$$$daterange-aggregation-missing-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "range": { + "date_range": { + "field": "date", + "missing": "1976/11/30", + "ranges": [ + { + "key": "Older", + "to": "2016/02/01" + }, <1> + { + "key": "Newer", + "from": "2016/02/01", + "to" : "now/d" + } + ] + } + } + } +} +``` + +1. Documents without a value in the `date` field will be added to the "Older" bucket, as if they had a date value of "1976-11-30". + + + +## Date Format/Pattern [date-format-pattern] + +::::{note} +this information was copied from [DateTimeFormatter](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.md) +:::: + + +All ASCII letters are reserved as format pattern letters, which are defined as follows: + +| Symbol | Meaning | Presentation | Examples | +| --- | --- | --- | --- | +| G | era | text | AD; Anno Domini; A | +| u | year | year | 2004; 04 | +| y | year-of-era | year | 2004; 04 | +| D | day-of-year | number | 189 | +| M/L | month-of-year | number/text | 7; 07; Jul; July; J | +| d | day-of-month | number | 10 | +| Q/q | quarter-of-year | number/text | 3; 03; Q3; 3rd quarter | +| Y | week-based-year | year | 1996; 96 | +| w | week-of-week-based-year | number | 27 | +| W | week-of-month | number | 4 | +| E | day-of-week | text | Tue; Tuesday; T | +| e/c | localized day-of-week | number/text | 2; 02; Tue; Tuesday; T | +| F | week-of-month | number | 3 | +| a | am-pm-of-day | text | PM | +| h | clock-hour-of-am-pm (1-12) | number | 12 | +| K | hour-of-am-pm (0-11) | number | 0 | +| k | clock-hour-of-am-pm (1-24) | number | 0 | +| H | hour-of-day (0-23) | number | 0 | +| m | minute-of-hour | number | 30 | +| s | second-of-minute | number | 55 | +| S | fraction-of-second | fraction | 978 | +| A | milli-of-day | number | 1234 | +| n | nano-of-second | number | 987654321 | +| N | nano-of-day | number | 1234000000 | +| V | time-zone ID | zone-id | America/Los_Angeles; Z; -08:30 | +| z | time-zone name | zone-name | Pacific Standard Time; PST | +| O | localized zone-offset | offset-O | GMT+8; GMT+08:00; UTC-08:00; | +| X | zone-offset *Z* for zero | offset-X | Z; -08; -0830; -08:30; -083015; -08:30:15; | +| x | zone-offset | offset-x | +0000; -08; -0830; -08:30; -083015; -08:30:15; | +| Z | zone-offset | offset-Z | +0000; -0800; -08:00; | +| p | pad next | pad modifier | 1 | +| ' | escape for text | delimiter | '' | +| single quote | literal | ' | [ | +| optional section start | ] | optional section end | # | +| reserved for future use | { | reserved for future use | } | + +The count of pattern letters determines the format. + +Text +: The text style is determined based on the number of pattern letters used. Less than 4 pattern letters will use the short form. Exactly 4 pattern letters will use the full form. Exactly 5 pattern letters will use the narrow form. Pattern letters `L`, `c`, and `q` specify the stand-alone form of the text styles. + +Number +: If the count of letters is one, then the value is output using the minimum number of digits and without padding. Otherwise, the count of digits is used as the width of the output field, with the value zero-padded as necessary. The following pattern letters have constraints on the count of letters. Only one letter of `c` and `F` can be specified. Up to two letters of `d`, `H`, `h`, `K`, `k`, `m`, and `s` can be specified. Up to three letters of `D` can be specified. + +Number/Text +: If the count of pattern letters is 3 or greater, use the Text rules above. Otherwise use the Number rules above. + +Fraction +: Outputs the nano-of-second field as a fraction-of-second. The nano-of-second value has nine digits, thus the count of pattern letters is from 1 to 9. If it is less than 9, then the nano-of-second value is truncated, with only the most significant digits being output. + +Year +: The count of letters determines the minimum field width below which padding is used. If the count of letters is two, then a reduced two digit form is used. For printing, this outputs the rightmost two digits. For parsing, this will parse using the base value of 2000, resulting in a year within the range 2000 to 2099 inclusive. If the count of letters is less than four (but not two), then the sign is only output for negative years as per `SignStyle.NORMAL`. Otherwise, the sign is output if the pad width is exceeded, as per `SignStyle.EXCEEDS_PAD`. + +ZoneId +: This outputs the time-zone ID, such as `Europe/Paris`. If the count of letters is two, then the time-zone ID is output. Any other count of letters throws `IllegalArgumentException`. + +Zone names +: This outputs the display name of the time-zone ID. If the count of letters is one, two or three, then the short name is output. If the count of letters is four, then the full name is output. Five or more letters throws `IllegalArgumentException`. + +Offset X and x +: This formats the offset based on the number of pattern letters. One letter outputs just the hour, such as `+01`, unless the minute is non-zero in which case the minute is also output, such as `+0130`. Two letters outputs the hour and minute, without a colon, such as `+0130`. Three letters outputs the hour and minute, with a colon, such as `+01:30`. Four letters outputs the hour and minute and optional second, without a colon, such as `+013015`. Five letters outputs the hour and minute and optional second, with a colon, such as `+01:30:15`. Six or more letters throws `IllegalArgumentException`. Pattern letter `X` (upper case) will output `Z` when the offset to be output would be zero, whereas pattern letter `x` (lower case) will output `+00`, `+0000`, or `+00:00`. + +Offset O +: This formats the localized offset based on the number of pattern letters. One letter outputs the short form of the localized offset, which is localized offset text, such as `GMT`, with hour without leading zero, optional 2-digit minute and second if non-zero, and colon, for example `GMT+8`. Four letters outputs the full form, which is localized offset text, such as `GMT, with 2-digit hour and minute field, optional second field if non-zero, and colon, for example `GMT+08:00`. Any other count of letters throws `IllegalArgumentException`. + +Offset Z +: This formats the offset based on the number of pattern letters. One, two or three letters outputs the hour and minute, without a colon, such as `+0130`. The output will be `+0000` when the offset is zero. Four letters outputs the full form of localized offset, equivalent to four letters of Offset-O. The output will be the corresponding localized offset text if the offset is zero. Five letters outputs the hour, minute, with optional second if non-zero, with colon. It outputs `Z` if the offset is zero. Six or more letters throws IllegalArgumentException. + +Optional section +: The optional section markers work exactly like calling `DateTimeFormatterBuilder.optionalStart()` and `DateTimeFormatterBuilder.optionalEnd()`. + +Pad modifier +: Modifies the pattern that immediately follows to be padded with spaces. The pad width is determined by the number of pattern letters. This is the same as calling `DateTimeFormatterBuilder.padNext(int)`. + +For example, `ppH` outputs the hour-of-day padded on the left with spaces to a width of 2. + +Any unrecognized letter is an error. Any non-letter character, other than `[`, `]`, `{`, `}`, `#` and the single quote will be output directly. Despite this, it is recommended to use single quotes around all characters that you want to output directly to ensure that future changes do not break your application. + + +## Time zone in date range aggregations [time-zones] + +Dates can be converted from another time zone to UTC by specifying the `time_zone` parameter. + +Time zones may either be specified as an ISO 8601 UTC offset (e.g. +01:00 or -08:00) or as one of the time zone ids from the TZ database. + +The `time_zone` parameter is also applied to rounding in date math expressions. As an example, to round to the beginning of the day in the CET time zone, you can do the following: + +$$$daterange-aggregation-timezone-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "range": { + "date_range": { + "field": "date", + "time_zone": "CET", + "ranges": [ + { "to": "2016/02/01" }, <1> + { "from": "2016/02/01", "to" : "now/d" }, <2> + { "from": "now/d" } + ] + } + } + } +} +``` + +1. This date will be converted to `2016-02-01T00:00:00.000+01:00`. +2. `now/d` will be rounded to the beginning of the day in the CET time zone. + + + +## Keyed Response [_keyed_response] + +Setting the `keyed` flag to `true` will associate a unique string key with each bucket and return the ranges as a hash rather than an array: + +$$$daterange-aggregation-keyed-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "range": { + "date_range": { + "field": "date", + "format": "MM-yyy", + "ranges": [ + { "to": "now-10M/M" }, + { "from": "now-10M/M" } + ], + "keyed": true + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "range": { + "buckets": { + "*-10-2015": { + "to": 1.4436576E12, + "to_as_string": "10-2015", + "doc_count": 7 + }, + "10-2015-*": { + "from": 1.4436576E12, + "from_as_string": "10-2015", + "doc_count": 0 + } + } + } + } +} +``` + +It is also possible to customize the key for each range: + +$$$daterange-aggregation-keyed-multiple-keys-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "range": { + "date_range": { + "field": "date", + "format": "MM-yyy", + "ranges": [ + { "from": "01-2015", "to": "03-2015", "key": "quarter_01" }, + { "from": "03-2015", "to": "06-2015", "key": "quarter_02" } + ], + "keyed": true + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "range": { + "buckets": { + "quarter_01": { + "from": 1.4200704E12, + "from_as_string": "01-2015", + "to": 1.425168E12, + "to_as_string": "03-2015", + "doc_count": 5 + }, + "quarter_02": { + "from": 1.425168E12, + "from_as_string": "03-2015", + "to": 1.4331168E12, + "to_as_string": "06-2015", + "doc_count": 2 + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-diversified-sampler-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-diversified-sampler-aggregation.md new file mode 100644 index 0000000000000..e8483a65571af --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-diversified-sampler-aggregation.md @@ -0,0 +1,197 @@ +--- +navigation_title: "Diversified sampler" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-diversified-sampler-aggregation.html +--- + +# Diversified sampler aggregation [search-aggregations-bucket-diversified-sampler-aggregation] + + +Like the `sampler` aggregation this is a filtering aggregation used to limit any sub aggregations' processing to a sample of the top-scoring documents. The `diversified_sampler` aggregation adds the ability to limit the number of matches that share a common value such as an "author". + +::::{note} +Any good market researcher will tell you that when working with samples of data it is important that the sample represents a healthy variety of opinions rather than being skewed by any single voice. The same is true with aggregations and sampling with these diversify settings can offer a way to remove the bias in your content (an over-populated geography, a large spike in a timeline or an over-active forum spammer). +:::: + + +* Tightening the focus of analytics to high-relevance matches rather than the potentially very long tail of low-quality matches +* Removing bias from analytics by ensuring fair representation of content from different sources +* Reducing the running cost of aggregations that can produce useful results using only samples e.g. `significant_terms` + +The `field` setting is used to provide values used for de-duplication and the `max_docs_per_value` setting controls the maximum number of documents collected on any one shard which share a common value. The default setting for `max_docs_per_value` is 1. + +The aggregation will throw an error if the `field` produces multiple values for a single document (de-duplication using multi-valued fields is not supported due to efficiency concerns). + +Example: + +We might want to see which tags are strongly associated with `#elasticsearch` on StackOverflow forum posts but ignoring the effects of some prolific users with a tendency to misspell #Kibana as #Cabana. + +$$$diversified-sampler-aggregation-example$$$ + +```console +POST /stackoverflow/_search?size=0 +{ + "query": { + "query_string": { + "query": "tags:elasticsearch" + } + }, + "aggs": { + "my_unbiased_sample": { + "diversified_sampler": { + "shard_size": 200, + "field": "author" + }, + "aggs": { + "keywords": { + "significant_terms": { + "field": "tags", + "exclude": [ "elasticsearch" ] + } + } + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "my_unbiased_sample": { + "doc_count": 151, <1> + "keywords": { <2> + "doc_count": 151, + "bg_count": 650, + "buckets": [ + { + "key": "kibana", + "doc_count": 150, + "score": 2.213, + "bg_count": 200 + } + ] + } + } + } +} +``` + +1. 151 documents were sampled in total. +2. The results of the significant_terms aggregation are not skewed by any single author’s quirks because we asked for a maximum of one post from any one author in our sample. + + +## Scripted example [_scripted_example] + +In this scenario we might want to diversify on a combination of field values. We can use a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md) to produce a hash of the multiple values in a tags field to ensure we don’t have a sample that consists of the same repeated combinations of tags. + +$$$diversified-sampler-aggregation-runtime-field-example$$$ + +```console +POST /stackoverflow/_search?size=0 +{ + "query": { + "query_string": { + "query": "tags:kibana" + } + }, + "runtime_mappings": { + "tags.hash": { + "type": "long", + "script": "emit(doc['tags'].hashCode())" + } + }, + "aggs": { + "my_unbiased_sample": { + "diversified_sampler": { + "shard_size": 200, + "max_docs_per_value": 3, + "field": "tags.hash" + }, + "aggs": { + "keywords": { + "significant_terms": { + "field": "tags", + "exclude": [ "kibana" ] + } + } + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "my_unbiased_sample": { + "doc_count": 6, + "keywords": { + "doc_count": 6, + "bg_count": 650, + "buckets": [ + { + "key": "logstash", + "doc_count": 3, + "score": 2.213, + "bg_count": 50 + }, + { + "key": "elasticsearch", + "doc_count": 3, + "score": 1.34, + "bg_count": 200 + } + ] + } + } + } +} +``` + + +## shard_size [_shard_size] + +The `shard_size` parameter limits how many top-scoring documents are collected in the sample processed on each shard. The default value is 100. + + +## max_docs_per_value [_max_docs_per_value] + +The `max_docs_per_value` is an optional parameter and limits how many documents are permitted per choice of de-duplicating value. The default setting is "1". + + +## execution_hint [_execution_hint] + +The optional `execution_hint` setting can influence the management of the values used for de-duplication. Each option will hold up to `shard_size` values in memory while performing de-duplication but the type of value held can be controlled as follows: + +* hold field values directly (`map`) +* hold ordinals of the field as determined by the Lucene index (`global_ordinals`) +* hold hashes of the field values - with potential for hash collisions (`bytes_hash`) + +The default setting is to use [`global_ordinals`](/reference/elasticsearch/mapping-reference/eager-global-ordinals.md) if this information is available from the Lucene index and reverting to `map` if not. The `bytes_hash` setting may prove faster in some cases but introduces the possibility of false positives in de-duplication logic due to the possibility of hash collisions. Please note that Elasticsearch will ignore the choice of execution hint if it is not applicable and that there is no backward compatibility guarantee on these hints. + + +## Limitations [_limitations_6] + +### Cannot be nested under `breadth_first` aggregations [div-sampler-breadth-first-nested-agg] + +Being a quality-based filter the diversified_sampler aggregation needs access to the relevance score produced for each document. It therefore cannot be nested under a `terms` aggregation which has the `collect_mode` switched from the default `depth_first` mode to `breadth_first` as this discards scores. In this situation an error will be thrown. + + +### Limited de-dup logic. [_limited_de_dup_logic] + +The de-duplication logic applies only at a shard level so will not apply across shards. + + +### No specialized syntax for geo/date fields [spec-syntax-geo-date-fields] + +Currently the syntax for defining the diversifying values is defined by a choice of `field` or `script` - there is no added syntactical sugar for expressing geo or date units such as "7d" (7 days). This support may be added in a later release and users will currently have to create these sorts of values using a script. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-filter-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-filter-aggregation.md new file mode 100644 index 0000000000000..565f58cdba443 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-filter-aggregation.md @@ -0,0 +1,135 @@ +--- +navigation_title: "Filter" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-filter-aggregation.html +--- + +# Filter aggregation [search-aggregations-bucket-filter-aggregation] + + +A single bucket aggregation that narrows the set of documents to those that match a [query](/reference/query-languages/querydsl.md). + +Example: + +$$$filter-aggregation-example$$$ + +```console +POST /sales/_search?size=0&filter_path=aggregations +{ + "aggs": { + "avg_price": { "avg": { "field": "price" } }, + "t_shirts": { + "filter": { "term": { "type": "t-shirt" } }, + "aggs": { + "avg_price": { "avg": { "field": "price" } } + } + } + } +} +``` + +The previous example calculates the average price of all sales as well as the average price of all T-shirt sales. + +Response: + +```console-result +{ + "aggregations": { + "avg_price": { "value": 140.71428571428572 }, + "t_shirts": { + "doc_count": 3, + "avg_price": { "value": 128.33333333333334 } + } + } +} +``` + +## Use a top-level `query` to limit all aggregations [use-top-level-query-to-limit-all-aggs] + +To limit the documents on which all aggregations in a search run, use a top-level `query`. This is faster than a single `filter` aggregation with sub-aggregations. + +For example, use this: + +$$$filter-aggregation-top-good$$$ + +```console +POST /sales/_search?size=0&filter_path=aggregations +{ + "query": { "term": { "type": "t-shirt" } }, + "aggs": { + "avg_price": { "avg": { "field": "price" } } + } +} +``` + +Instead of this: + +$$$filter-aggregation-top-bad$$$ + +```console +POST /sales/_search?size=0&filter_path=aggregations +{ + "aggs": { + "t_shirts": { + "filter": { "term": { "type": "t-shirt" } }, + "aggs": { + "avg_price": { "avg": { "field": "price" } } + } + } + } +} +``` + + +## Use the `filters` aggregation for multiple filters [use-filters-agg-for-multiple-filters] + +To group documents using multiple filters, use the [`filters` aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-filters-aggregation.md). This is faster than multiple `filter` aggregations. + +For example, use this: + +$$$filter-aggregation-many-good$$$ + +```console +POST /sales/_search?size=0&filter_path=aggregations +{ + "aggs": { + "f": { + "filters": { + "filters": { + "hats": { "term": { "type": "hat" } }, + "t_shirts": { "term": { "type": "t-shirt" } } + } + }, + "aggs": { + "avg_price": { "avg": { "field": "price" } } + } + } + } +} +``` + +Instead of this: + +$$$filter-aggregation-many-bad$$$ + +```console +POST /sales/_search?size=0&filter_path=aggregations +{ + "aggs": { + "hats": { + "filter": { "term": { "type": "hat" } }, + "aggs": { + "avg_price": { "avg": { "field": "price" } } + } + }, + "t_shirts": { + "filter": { "term": { "type": "t-shirt" } }, + "aggs": { + "avg_price": { "avg": { "field": "price" } } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-filters-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-filters-aggregation.md new file mode 100644 index 0000000000000..03311dc5d8ac3 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-filters-aggregation.md @@ -0,0 +1,244 @@ +--- +navigation_title: "Filters" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-filters-aggregation.html +--- + +# Filters aggregation [search-aggregations-bucket-filters-aggregation] + + +A multi-bucket aggregation where each bucket contains the documents that match a [query](/reference/query-languages/querydsl.md). + +Example: + +$$$filters-aggregation-example$$$ + +```console +PUT /logs/_bulk?refresh +{ "index" : { "_id" : 1 } } +{ "body" : "warning: page could not be rendered" } +{ "index" : { "_id" : 2 } } +{ "body" : "authentication error" } +{ "index" : { "_id" : 3 } } +{ "body" : "warning: connection timed out" } + +GET logs/_search +{ + "size": 0, + "aggs" : { + "messages" : { + "filters" : { + "filters" : { + "errors" : { "match" : { "body" : "error" }}, + "warnings" : { "match" : { "body" : "warning" }} + } + } + } + } +} +``` + +In the above example, we analyze log messages. The aggregation will build two collection (buckets) of log messages - one for all those containing an error, and another for all those containing a warning. + +Response: + +```console-result +{ + "took": 9, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "messages": { + "buckets": { + "errors": { + "doc_count": 1 + }, + "warnings": { + "doc_count": 2 + } + } + } + } +} +``` + +## Anonymous filters [anonymous-filters] + +The filters field can also be provided as an array of filters, as in the following request: + +$$$filters-aggregation-anonymous-example$$$ + +```console +GET logs/_search +{ + "size": 0, + "aggs" : { + "messages" : { + "filters" : { + "filters" : [ + { "match" : { "body" : "error" }}, + { "match" : { "body" : "warning" }} + ] + } + } + } +} +``` + +The filtered buckets are returned in the same order as provided in the request. The response for this example would be: + +```console-result +{ + "took": 4, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "messages": { + "buckets": [ + { + "doc_count": 1 + }, + { + "doc_count": 2 + } + ] + } + } +} +``` + + +## `Other` Bucket [other-bucket] + +The `other_bucket` parameter can be set to add a bucket to the response which will contain all documents that do not match any of the given filters. The value of this parameter can be as follows: + +`false` +: Does not compute the `other` bucket + +`true` +: Returns the `other` bucket either in a bucket (named `_other_` by default) if named filters are being used, or as the last bucket if anonymous filters are being used + +The `other_bucket_key` parameter can be used to set the key for the `other` bucket to a value other than the default `_other_`. Setting this parameter will implicitly set the `other_bucket` parameter to `true`. + +The following snippet shows a response where the `other` bucket is requested to be named `other_messages`. + +$$$filters-aggregation-other-bucket-example$$$ + +```console +PUT logs/_doc/4?refresh +{ + "body": "info: user Bob logged out" +} + +GET logs/_search +{ + "size": 0, + "aggs" : { + "messages" : { + "filters" : { + "other_bucket_key": "other_messages", + "filters" : { + "errors" : { "match" : { "body" : "error" }}, + "warnings" : { "match" : { "body" : "warning" }} + } + } + } + } +} +``` + +The response would be something like the following: + +```console-result +{ + "took": 3, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "messages": { + "buckets": { + "errors": { + "doc_count": 1 + }, + "warnings": { + "doc_count": 2 + }, + "other_messages": { + "doc_count": 1 + } + } + } + } +} +``` + + +## Non-keyed Response [non-keyed-response] + +By default, the named filters aggregation returns the buckets as an object. But in some sorting cases, such as [bucket sort](/reference/data-analysis/aggregations/search-aggregations-pipeline-bucket-sort-aggregation.md), the JSON doesn’t guarantee the order of elements in the object. You can use the `keyed` parameter to specify the buckets as an array of objects. The value of this parameter can be as follows: + +`true` +: (Default) Returns the buckets as an object + +`false` +: Returns the buckets as an array of objects + +::::{note} +This parameter is ignored by [Anonymous filters](#anonymous-filters). +:::: + + +Example: + +$$$filters-aggregation-sortable-example$$$ + +```console +POST /sales/_search?size=0&filter_path=aggregations +{ + "aggs": { + "the_filter": { + "filters": { + "keyed": false, + "filters": { + "t-shirt": { "term": { "type": "t-shirt" } }, + "hat": { "term": { "type": "hat" } } + } + }, + "aggs": { + "avg_price": { "avg": { "field": "price" } }, + "sort_by_avg_price": { + "bucket_sort": { "sort": { "avg_price": "asc" } } + } + } + } + } +} +``` + +Response: + +```console-result +{ + "aggregations": { + "the_filter": { + "buckets": [ + { + "key": "t-shirt", + "doc_count": 3, + "avg_price": { "value": 128.33333333333334 } + }, + { + "key": "hat", + "doc_count": 3, + "avg_price": { "value": 150.0 } + } + ] + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-frequent-item-sets-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-frequent-item-sets-aggregation.md new file mode 100644 index 0000000000000..39e37850bc067 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-frequent-item-sets-aggregation.md @@ -0,0 +1,311 @@ +--- +navigation_title: "Frequent item sets" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-frequent-item-sets-aggregation.html +--- + +# Frequent item sets aggregation [search-aggregations-bucket-frequent-item-sets-aggregation] + + +A bucket aggregation which finds frequent item sets. It is a form of association rules mining that identifies items that often occur together. Items that are frequently purchased together or log events that tend to co-occur are examples of frequent item sets. Finding frequent item sets helps to discover relationships between different data points (items). + +The aggregation reports closed item sets. A frequent item set is called closed if no superset exists with the same ratio of documents (also known as its [support value](#frequent-item-sets-minimum-support)). For example, we have the two following candidates for a frequent item set, which have the same support value: 1. `apple, orange, banana` 2. `apple, orange, banana, tomato`. Only the second item set (`apple, orange, banana, tomato`) is returned, and the first set – which is a subset of the second one – is skipped. Both item sets might be returned if their support values are different. + +The runtime of the aggregation depends on the data and the provided parameters. It might take a significant time for the aggregation to complete. For this reason, it is recommended to use [async search](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-async-search-submit) to run your requests asynchronously. + +## Syntax [_syntax_2] + +A `frequent_item_sets` aggregation looks like this in isolation: + +```js +"frequent_item_sets": { + "minimum_set_size": 3, + "fields": [ + {"field": "my_field_1"}, + {"field": "my_field_2"} + ] +} +``` + +| | | | | +| --- | --- | --- | --- | +| Parameter Name | Description | Required | Default Value | +| `fields` | (array) Fields to analyze. | Required | | +| `minimum_set_size` | (integer) The [minimum size](#frequent-item-sets-minimum-set-size) of one item set. | Optional | `1` | +| `minimum_support` | (integer) The [minimum support](#frequent-item-sets-minimum-support) of one item set. | Optional | `0.1` | +| `size` | (integer) The number of top item sets to return. | Optional | `10` | +| `filter` | (object) Query that filters documents from the analysis | Optional | `match_all` | + + +### Fields [frequent-item-sets-fields] + +Supported field types for the analyzed fields are keyword, numeric, ip, date, and arrays of these types. You can also add runtime fields to your analyzed fields. + +If the combined cardinality of the analyzed fields are high, the aggregation might require a significant amount of system resources. + +You can filter the values for each field by using the `include` and `exclude` parameters. The parameters can be regular expression strings or arrays of strings of exact terms. The filtered values are removed from the analysis and therefore reduce the runtime. If both `include` and `exclude` are defined, `exclude` takes precedence; it means `include` is evaluated first and then `exclude`. + + +### Minimum set size [frequent-item-sets-minimum-set-size] + +The minimum set size is the minimum number of items the set needs to contain. A value of 1 returns the frequency of single items. Only item sets that contain at least the number of `minimum_set_size` items are returned. For example, the item set `orange, banana, apple` is returned only if the minimum set size is 3 or lower. + + +### Minimum support [frequent-item-sets-minimum-support] + +The minimum support value is the ratio of documents that an item set must exist in to be considered "frequent". In particular, it is a normalized value between 0 and 1. It is calculated by dividing the number of documents containing the item set by the total number of documents. + +For example, if a given item set is contained by five documents and the total number of documents is 20, then the support of the item set is 5/20 = 0.25. Therefore, this set is returned only if the minimum support is 0.25 or lower. As a higher minimum support prunes more items, the calculation is less resource intensive. The `minimum_support` parameter has an effect on the required memory and the runtime of the aggregation. + + +### Size [frequent-item-sets-size] + +This parameter defines the maximum number of item sets to return. The result contains top-k item sets; the item sets with the highest support values. This parameter has a significant effect on the required memory and the runtime of the aggregation. + + +### Filter [frequent-item-sets-filter] + +A query to filter documents to use as part of the analysis. Documents that don’t match the filter are ignored when generating the item sets, however still count when calculating the support of an item set. + +Use the filter if you want to narrow the item set analysis to fields of interest. Use a top-level query to filter the data set. + + +### Examples [frequent-item-sets-example] + +In the following examples, we use the e-commerce {{kib}} sample data set. + + +### Aggregation with two analyzed fields and an `exclude` parameter [_aggregation_with_two_analyzed_fields_and_an_exclude_parameter] + +In the first example, the goal is to find out based on transaction data (1.) from what product categories the customers purchase products frequently together and (2.) from which cities they make those purchases. We want to exclude results where location information is not available (where the city name is `other`). Finally, we are interested in sets with three or more items, and want to see the first three frequent item sets with the highest support. + +Note that we use the [async search](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-async-search-submit) endpoint in this first example. + +```console +POST /kibana_sample_data_ecommerce/_async_search +{ + "size":0, + "aggs":{ + "my_agg":{ + "frequent_item_sets":{ + "minimum_set_size":3, + "fields":[ + { + "field":"category.keyword" + }, + { + "field":"geoip.city_name", + "exclude":"other" + } + ], + "size":3 + } + } + } +} +``` + +The response of the API call above contains an identifier (`id`) of the async search request. You can use the identifier to retrieve the search results: + +```console +GET /_async_search/ +``` + +The API returns a response similar to the following one: + +```console-result +(...) +"aggregations" : { + "my_agg" : { + "buckets" : [ <1> + { + "key" : { <2> + "category.keyword" : [ + "Women's Clothing", + "Women's Shoes" + ], + "geoip.city_name" : [ + "New York" + ] + }, + "doc_count" : 217, <3> + "support" : 0.04641711229946524 <4> + }, + { + "key" : { + "category.keyword" : [ + "Women's Clothing", + "Women's Accessories" + ], + "geoip.city_name" : [ + "New York" + ] + }, + "doc_count" : 135, + "support" : 0.028877005347593583 + }, + { + "key" : { + "category.keyword" : [ + "Men's Clothing", + "Men's Shoes" + ], + "geoip.city_name" : [ + "Cairo" + ] + }, + "doc_count" : 123, + "support" : 0.026310160427807486 + } + ], + (...) + } +} +``` + +1. The array of returned item sets. +2. The `key` object contains one item set. In this case, it consists of two values of the `category.keyword` field and one value of the `geoip.city_name`. +3. The number of documents that contain the item set. +4. The support value of the item set. It is calculated by dividing the number of documents containing the item set by the total number of documents. + + +The response shows that the categories customers purchase from most frequently together are `Women's Clothing` and `Women's Shoes` and customers from New York tend to buy items from these categories frequently together. In other words, customers who buy products labelled `Women's Clothing` more likely buy products also from the `Women's Shoes` category and customers from New York most likely buy products from these categories together. The item set with the second highest support is `Women's Clothing` and `Women's Accessories` with customers mostly from New York. Finally, the item set with the third highest support is `Men's Clothing` and `Men's Shoes` with customers mostly from Cairo. + + +### Aggregation with two analyzed fields and a filter [_aggregation_with_two_analyzed_fields_and_a_filter] + +We take the first example, but want to narrow the item sets to places in Europe. For that, we add a filter, and this time, we don’t use the `exclude` parameter: + +```console +POST /kibana_sample_data_ecommerce/_async_search +{ + "size": 0, + "aggs": { + "my_agg": { + "frequent_item_sets": { + "minimum_set_size": 3, + "fields": [ + { "field": "category.keyword" }, + { "field": "geoip.city_name" } + ], + "size": 3, + "filter": { + "term": { + "geoip.continent_name": "Europe" + } + } + } + } + } +} +``` + +The result will only show item sets that created from documents matching the filter, namely purchases in Europe. Using `filter`, the calculated `support` still takes all purchases into acount. That’s different than specifying a query at the top-level, in which case `support` gets calculated only from purchases in Europe. + + +### Analyzing numeric values by using a runtime field [_analyzing_numeric_values_by_using_a_runtime_field] + +The frequent items aggregation enables you to bucket numeric values by using [runtime fields](docs-content://manage-data/data-store/mapping/runtime-fields.md). The next example demonstrates how to use a script to add a runtime field to your documents called `price_range`, which is calculated from the taxful total price of the individual transactions. The runtime field then can be used in the frequent items aggregation as a field to analyze. + +```console +GET kibana_sample_data_ecommerce/_search +{ + "runtime_mappings": { + "price_range": { + "type": "keyword", + "script": { + "source": """ + def bucket_start = (long) Math.floor(doc['taxful_total_price'].value / 50) * 50; + def bucket_end = bucket_start + 50; + emit(bucket_start.toString() + "-" + bucket_end.toString()); + """ + } + } + }, + "size": 0, + "aggs": { + "my_agg": { + "frequent_item_sets": { + "minimum_set_size": 4, + "fields": [ + { + "field": "category.keyword" + }, + { + "field": "price_range" + }, + { + "field": "geoip.city_name" + } + ], + "size": 3 + } + } + } +} +``` + +The API returns a response similar to the following one: + +```console-result +(...) +"aggregations" : { + "my_agg" : { + "buckets" : [ + { + "key" : { + "category.keyword" : [ + "Women's Clothing", + "Women's Shoes" + ], + "price_range" : [ + "50-100" + ], + "geoip.city_name" : [ + "New York" + ] + }, + "doc_count" : 100, + "support" : 0.0213903743315508 + }, + { + "key" : { + "category.keyword" : [ + "Women's Clothing", + "Women's Shoes" + ], + "price_range" : [ + "50-100" + ], + "geoip.city_name" : [ + "Dubai" + ] + }, + "doc_count" : 59, + "support" : 0.012620320855614974 + }, + { + "key" : { + "category.keyword" : [ + "Men's Clothing", + "Men's Shoes" + ], + "price_range" : [ + "50-100" + ], + "geoip.city_name" : [ + "Marrakesh" + ] + }, + "doc_count" : 53, + "support" : 0.011336898395721925 + } + ], + (...) + } + } +``` + +The response shows the categories that customers purchase from most frequently together, the location of the customers who tend to buy items from these categories, and the most frequent price ranges of these purchases. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-geodistance-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-geodistance-aggregation.md new file mode 100644 index 0000000000000..f34919ec75909 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-geodistance-aggregation.md @@ -0,0 +1,245 @@ +--- +navigation_title: "Geo-distance" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-geodistance-aggregation.html +--- + +# Geo-distance aggregation [search-aggregations-bucket-geodistance-aggregation] + + +A multi-bucket aggregation that works on `geo_point` fields and conceptually works very similar to the [range](/reference/data-analysis/aggregations/search-aggregations-bucket-range-aggregation.md) aggregation. The user can define a point of origin and a set of distance range buckets. The aggregation evaluates the distance of each document value from the origin point and determines the buckets it belongs to based on the ranges (a document belongs to a bucket if the distance between the document and the origin falls within the distance range of the bucket). + +$$$geodistance-aggregation-example$$$ + +```console +PUT /museums +{ + "mappings": { + "properties": { + "location": { + "type": "geo_point" + } + } + } +} + +POST /museums/_bulk?refresh +{"index":{"_id":1}} +{"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} +{"index":{"_id":2}} +{"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} +{"index":{"_id":3}} +{"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} +{"index":{"_id":4}} +{"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} +{"index":{"_id":5}} +{"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} +{"index":{"_id":6}} +{"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} + +POST /museums/_search?size=0 +{ + "aggs": { + "rings_around_amsterdam": { + "geo_distance": { + "field": "location", + "origin": "POINT (4.894 52.3760)", + "ranges": [ + { "to": 100000 }, + { "from": 100000, "to": 300000 }, + { "from": 300000 } + ] + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "rings_around_amsterdam": { + "buckets": [ + { + "key": "*-100000.0", + "from": 0.0, + "to": 100000.0, + "doc_count": 3 + }, + { + "key": "100000.0-300000.0", + "from": 100000.0, + "to": 300000.0, + "doc_count": 1 + }, + { + "key": "300000.0-*", + "from": 300000.0, + "doc_count": 2 + } + ] + } + } +} +``` + +The specified field must be of type `geo_point` (which can only be set explicitly in the mappings). And it can also hold an array of `geo_point` fields, in which case all will be taken into account during aggregation. The origin point can accept all formats supported by the [`geo_point` type](/reference/elasticsearch/mapping-reference/geo-point.md): + +* Object format: `{ "lat" : 52.3760, "lon" : 4.894 }` - this is the safest format as it is the most explicit about the `lat` & `lon` values +* String format: `"52.3760, 4.894"` - where the first number is the `lat` and the second is the `lon` +* Array format: `[4.894, 52.3760]` - which is based on the GeoJSON standard where the first number is the `lon` and the second one is the `lat` + +By default, the distance unit is `m` (meters) but it can also accept: `mi` (miles), `in` (inches), `yd` (yards), `km` (kilometers), `cm` (centimeters), `mm` (millimeters). + +```console +POST /museums/_search?size=0 +{ + "aggs": { + "rings": { + "geo_distance": { + "field": "location", + "origin": "POINT (4.894 52.3760)", + "unit": "km", <1> + "ranges": [ + { "to": 100 }, + { "from": 100, "to": 300 }, + { "from": 300 } + ] + } + } + } +} +``` + +1. The distances will be computed in kilometers + + +There are two distance calculation modes: `arc` (the default), and `plane`. The `arc` calculation is the most accurate. The `plane` is the fastest but least accurate. Consider using `plane` when your search context is "narrow", and spans smaller geographical areas (~5km). `plane` will return higher error margins for searches across very large areas (e.g. cross continent search). The distance calculation type can be set using the `distance_type` parameter: + +```console +POST /museums/_search?size=0 +{ + "aggs": { + "rings": { + "geo_distance": { + "field": "location", + "origin": "POINT (4.894 52.3760)", + "unit": "km", + "distance_type": "plane", + "ranges": [ + { "to": 100 }, + { "from": 100, "to": 300 }, + { "from": 300 } + ] + } + } + } +} +``` + +## Keyed Response [_keyed_response_2] + +Setting the `keyed` flag to `true` will associate a unique string key with each bucket and return the ranges as a hash rather than an array: + +```console +POST /museums/_search?size=0 +{ + "aggs": { + "rings_around_amsterdam": { + "geo_distance": { + "field": "location", + "origin": "POINT (4.894 52.3760)", + "ranges": [ + { "to": 100000 }, + { "from": 100000, "to": 300000 }, + { "from": 300000 } + ], + "keyed": true + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "rings_around_amsterdam": { + "buckets": { + "*-100000.0": { + "from": 0.0, + "to": 100000.0, + "doc_count": 3 + }, + "100000.0-300000.0": { + "from": 100000.0, + "to": 300000.0, + "doc_count": 1 + }, + "300000.0-*": { + "from": 300000.0, + "doc_count": 2 + } + } + } + } +} +``` + +It is also possible to customize the key for each range: + +```console +POST /museums/_search?size=0 +{ + "aggs": { + "rings_around_amsterdam": { + "geo_distance": { + "field": "location", + "origin": "POINT (4.894 52.3760)", + "ranges": [ + { "to": 100000, "key": "first_ring" }, + { "from": 100000, "to": 300000, "key": "second_ring" }, + { "from": 300000, "key": "third_ring" } + ], + "keyed": true + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "rings_around_amsterdam": { + "buckets": { + "first_ring": { + "from": 0.0, + "to": 100000.0, + "doc_count": 3 + }, + "second_ring": { + "from": 100000.0, + "to": 300000.0, + "doc_count": 1 + }, + "third_ring": { + "from": 300000.0, + "doc_count": 2 + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-geohashgrid-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-geohashgrid-aggregation.md new file mode 100644 index 0000000000000..10b3f689e6a6e --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-geohashgrid-aggregation.md @@ -0,0 +1,309 @@ +--- +navigation_title: "Geohash grid" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-geohashgrid-aggregation.html +--- + +# Geohash grid aggregation [search-aggregations-bucket-geohashgrid-aggregation] + + +A multi-bucket aggregation that groups [`geo_point`](/reference/elasticsearch/mapping-reference/geo-point.md) and [`geo_shape`](/reference/elasticsearch/mapping-reference/geo-shape.md) values into buckets that represent a grid. The resulting grid can be sparse and only contains cells that have matching data. Each cell is labeled using a [geohash](https://en.wikipedia.org/wiki/Geohash) which is of user-definable precision. + +* High precision geohashes have a long string length and represent cells that cover only a small area. +* Low precision geohashes have a short string length and represent cells that each cover a large area. + +Geohashes used in this aggregation can have a choice of precision between 1 and 12. + +::::{warning} +The highest-precision geohash of length 12 produces cells that cover less than a square metre of land and so high-precision requests can be very costly in terms of RAM and result sizes. Please see the example below on how to first filter the aggregation to a smaller geographic area before requesting high-levels of detail. +:::: + + +You can only use `geohash_grid` to aggregate an explicitly mapped `geo_point` or `geo_shape` field. If the `geo_point` field contains an array, `geohash_grid` aggregates all the array values. + +## Simple low-precision request [_simple_low_precision_request] + +$$$geohashgrid-aggregation-low-precision-example$$$ + +```console +PUT /museums +{ + "mappings": { + "properties": { + "location": { + "type": "geo_point" + } + } + } +} + +POST /museums/_bulk?refresh +{"index":{"_id":1}} +{"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} +{"index":{"_id":2}} +{"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} +{"index":{"_id":3}} +{"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} +{"index":{"_id":4}} +{"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} +{"index":{"_id":5}} +{"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} +{"index":{"_id":6}} +{"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} + +POST /museums/_search?size=0 +{ + "aggregations": { + "large-grid": { + "geohash_grid": { + "field": "location", + "precision": 3 + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "large-grid": { + "buckets": [ + { + "key": "u17", + "doc_count": 3 + }, + { + "key": "u09", + "doc_count": 2 + }, + { + "key": "u15", + "doc_count": 1 + } + ] + } +} +} +``` + + +## High-precision requests [_high_precision_requests] + +When requesting detailed buckets (typically for displaying a "zoomed in" map) a filter like [geo_bounding_box](/reference/query-languages/query-dsl-geo-bounding-box-query.md) should be applied to narrow the subject area otherwise potentially millions of buckets will be created and returned. + +$$$geohashgrid-aggregation-high-precision-example$$$ + +```console +POST /museums/_search?size=0 +{ + "aggregations": { + "zoomed-in": { + "filter": { + "geo_bounding_box": { + "location": { + "top_left": "POINT (4.9 52.4)", + "bottom_right": "POINT (5.0 52.3)" + } + } + }, + "aggregations": { + "zoom1": { + "geohash_grid": { + "field": "location", + "precision": 8 + } + } + } + } + } +} +``` + +The geohashes returned by the `geohash_grid` aggregation can be also used for zooming in. To zoom into the first geohash `u17` returned in the previous example, it should be specified as both `top_left` and `bottom_right` corner: + +```console +POST /museums/_search?size=0 +{ + "aggregations": { + "zoomed-in": { + "filter": { + "geo_bounding_box": { + "location": { + "top_left": "u17", + "bottom_right": "u17" + } + } + }, + "aggregations": { + "zoom1": { + "geohash_grid": { + "field": "location", + "precision": 8 + } + } + } + } + } +} +``` + +```console-result +{ + ... + "aggregations": { + "zoomed-in": { + "doc_count": 3, + "zoom1": { + "buckets": [ + { + "key": "u173zy3j", + "doc_count": 1 + }, + { + "key": "u173zvfz", + "doc_count": 1 + }, + { + "key": "u173zt90", + "doc_count": 1 + } + ] + } + } + } +} +``` + +For "zooming in" on the system that don’t support geohashes, the bucket keys should be translated into bounding boxes using one of available geohash libraries. For example, for javascript the [node-geohash](https://github.com/sunng87/node-geohash) library can be used: + +```js +var geohash = require('ngeohash'); + +// bbox will contain [ 52.03125, 4.21875, 53.4375, 5.625 ] +// [ minlat, minlon, maxlat, maxlon] +var bbox = geohash.decode_bbox('u17'); +``` + + +## Requests with additional bounding box filtering [_requests_with_additional_bounding_box_filtering] + +The `geohash_grid` aggregation supports an optional `bounds` parameter that restricts the cells considered to those that intersects the bounds provided. The `bounds` parameter accepts the bounding box in all the same [accepted formats](/reference/query-languages/query-dsl-geo-bounding-box-query.md#query-dsl-geo-bounding-box-query-accepted-formats) of the bounds specified in the Geo Bounding Box Query. This bounding box can be used with or without an additional `geo_bounding_box` query filtering the points prior to aggregating. It is an independent bounding box that can intersect with, be equal to, or be disjoint to any additional `geo_bounding_box` queries defined in the context of the aggregation. + +$$$geohashgrid-aggregation-with-bounds$$$ + +```console +POST /museums/_search?size=0 +{ + "aggregations": { + "tiles-in-bounds": { + "geohash_grid": { + "field": "location", + "precision": 8, + "bounds": { + "top_left": "POINT (4.21875 53.4375)", + "bottom_right": "POINT (5.625 52.03125)" + } + } + } + } +} +``` + +```console-result +{ + ... + "aggregations": { + "tiles-in-bounds": { + "buckets": [ + { + "key": "u173zy3j", + "doc_count": 1 + }, + { + "key": "u173zvfz", + "doc_count": 1 + }, + { + "key": "u173zt90", + "doc_count": 1 + } + ] + } + } +} +``` + + +## Cell dimensions at the equator [_cell_dimensions_at_the_equator] + +The table below shows the metric dimensions for cells covered by various string lengths of geohash. Cell dimensions vary with latitude and so the table is for the worst-case scenario at the equator. + +**GeoHash length** +: **Area width x height** + +1 +: 5,009.4km x 4,992.6km + +2 +: 1,252.3km x 624.1km + +3 +: 156.5km x 156km + +4 +: 39.1km x 19.5km + +5 +: 4.9km x 4.9km + +6 +: 1.2km x 609.4m + +7 +: 152.9m x 152.4m + +8 +: 38.2m x 19m + +9 +: 4.8m x 4.8m + +10 +: 1.2m x 59.5cm + +11 +: 14.9cm x 14.9cm + +12 +: 3.7cm x 1.9cm + + +### Aggregating `geo_shape` fields [_aggregating_geo_shape_fields] + +Aggregating on [Geoshape](/reference/elasticsearch/mapping-reference/geo-shape.md) fields works just as it does for points, except that a single shape can be counted for in multiple tiles. A shape will contribute to the count of matching values if any part of its shape intersects with that tile. Below is an image that demonstrates this: + +![geoshape grid](../../../images/geoshape_grid.png "") + + +## Options [_options_3] + +field +: Mandatory. Field containing indexed geo-point or geo-shape values. Must be explicitly mapped as a [`geo_point`](/reference/elasticsearch/mapping-reference/geo-point.md) or a [`geo_shape`](/reference/elasticsearch/mapping-reference/geo-shape.md) field. If the field contains an array, `geohash_grid` aggregates all array values. + +precision +: Optional. The string length of the geohashes used to define cells/buckets in the results. Defaults to 5. The precision can either be defined in terms of the integer precision levels mentioned above. Values outside of [1,12] will be rejected. Alternatively, the precision level can be approximated from a distance measure like "1km", "10m". The precision level is calculate such that cells will not exceed the specified size (diagonal) of the required precision. When this would lead to precision levels higher than the supported 12 levels, (e.g. for distances <5.6cm) the value is rejected. + +bounds +: Optional. The bounding box to filter the points in the bucket. + +size +: Optional. The maximum number of geohash buckets to return (defaults to 10,000). When results are trimmed, buckets are prioritised based on the volumes of documents they contain. + +shard_size +: Optional. To allow for more accurate counting of the top cells returned in the final result the aggregation defaults to returning `max(10,(size x number-of-shards))` buckets from each shard. If this heuristic is undesirable, the number considered from each shard can be over-ridden using this parameter. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-geohexgrid-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-geohexgrid-aggregation.md new file mode 100644 index 0000000000000..1e53ffc901ec1 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-geohexgrid-aggregation.md @@ -0,0 +1,229 @@ +--- +navigation_title: "Geohex grid" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-geohexgrid-aggregation.html +--- + +# Geohex grid aggregation [search-aggregations-bucket-geohexgrid-aggregation] + + +A multi-bucket aggregation that groups [`geo_point`](/reference/elasticsearch/mapping-reference/geo-point.md) and [`geo_shape`](/reference/elasticsearch/mapping-reference/geo-shape.md) values into buckets that represent a grid. The resulting grid can be sparse and only contains cells that have matching data. Each cell corresponds to a [H3 cell index](https://h3geo.org/docs/core-library/h3Indexing#h3-cell-indexp) and is labeled using the [H3Index representation](https://h3geo.org/docs/core-library/h3Indexing#h3index-representation). + +See [the table of cell areas for H3 resolutions](https://h3geo.org/docs/core-library/restable) on how precision (zoom) correlates to size on the ground. Precision for this aggregation can be between 0 and 15, inclusive. + +::::{warning} +High-precision requests can be very expensive in terms of RAM and result sizes. For example, the highest-precision geohex with a precision of 15 produces cells that cover less than one square meter. We recommend you use a filter to limit high-precision requests to a smaller geographic area. For an example, refer to [High-precision requests](#geohexgrid-high-precision). +:::: + + +## Simple low-precision request [geohexgrid-low-precision] + +$$$geohexgrid-aggregation-example$$$ + +```console +PUT /museums +{ + "mappings": { + "properties": { + "location": { + "type": "geo_point" + } + } + } +} + +POST /museums/_bulk?refresh +{"index":{"_id":1}} +{"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} +{"index":{"_id":2}} +{"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} +{"index":{"_id":3}} +{"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} +{"index":{"_id":4}} +{"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} +{"index":{"_id":5}} +{"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} +{"index":{"_id":6}} +{"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} + +POST /museums/_search?size=0 +{ + "aggregations": { + "large-grid": { + "geohex_grid": { + "field": "location", + "precision": 4 + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "large-grid": { + "buckets": [ + { + "key": "841969dffffffff", + "doc_count": 3 + }, + { + "key": "841fb47ffffffff", + "doc_count": 2 + }, + { + "key": "841fa4dffffffff", + "doc_count": 1 + } + ] + } + } +} +``` + + +## High-precision requests [geohexgrid-high-precision] + +When requesting detailed buckets (typically for displaying a "zoomed in" map), a filter like [geo_bounding_box](/reference/query-languages/query-dsl-geo-bounding-box-query.md) should be applied to narrow the subject area. Otherwise, potentially millions of buckets will be created and returned. + +$$$geohexgrid-high-precision-ex$$$ + +```console +POST /museums/_search?size=0 +{ + "aggregations": { + "zoomed-in": { + "filter": { + "geo_bounding_box": { + "location": { + "top_left": "POINT (4.9 52.4)", + "bottom_right": "POINT (5.0 52.3)" + } + } + }, + "aggregations": { + "zoom1": { + "geohex_grid": { + "field": "location", + "precision": 12 + } + } + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "zoomed-in": { + "doc_count": 3, + "zoom1": { + "buckets": [ + { + "key": "8c1969c9b2617ff", + "doc_count": 1 + }, + { + "key": "8c1969526d753ff", + "doc_count": 1 + }, + { + "key": "8c1969526d26dff", + "doc_count": 1 + } + ] + } + } + } +} +``` + + +## Requests with additional bounding box filtering [geohexgrid-addtl-bounding-box-filtering] + +The `geohex_grid` aggregation supports an optional `bounds` parameter that restricts the cells considered to those that intersect the provided bounds. The `bounds` parameter accepts the same [bounding box formats](/reference/query-languages/query-dsl-geo-bounding-box-query.md#query-dsl-geo-bounding-box-query-accepted-formats) as the geo-bounding box query. This bounding box can be used with or without an additional `geo_bounding_box` query for filtering the points prior to aggregating. It is an independent bounding box that can intersect with, be equal to, or be disjoint to any additional `geo_bounding_box` queries defined in the context of the aggregation. + +$$$geohexgrid-aggregation-with-bounds$$$ + +```console +POST /museums/_search?size=0 +{ + "aggregations": { + "tiles-in-bounds": { + "geohex_grid": { + "field": "location", + "precision": 12, + "bounds": { + "top_left": "POINT (4.9 52.4)", + "bottom_right": "POINT (5.0 52.3)" + } + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "tiles-in-bounds": { + "buckets": [ + { + "key": "8c1969c9b2617ff", + "doc_count": 1 + }, + { + "key": "8c1969526d753ff", + "doc_count": 1 + }, + { + "key": "8c1969526d26dff", + "doc_count": 1 + } + ] + } + } +} +``` + + +### Aggregating `geo_shape` fields [geohexgrid-aggregating-geo-shape] + +Aggregating on [Geoshape](/reference/elasticsearch/mapping-reference/geo-shape.md) fields works almost as it does for points. There are two key differences: + +* When aggregating over `geo_point` data, points are considered within a hexagonal tile if they lie within the edges defined by great circles. In other words the calculation is done using spherical coordinates. However, when aggregating over `geo_shape` data, the shapes are considered within a hexagon if they lie within the edges defined as straight lines on an equirectangular projection. The reason is that Elasticsearch and Lucene treat edges using the equirectangular projection at index and search time. In order to ensure that search results and aggregation results are aligned, we therefore also use equirectangular projection in aggregations. For most data, the difference is subtle or not noticed. However, for low zoom levels (low precision), especially far from the equator, this can be noticeable. For example, if the same point data is indexed as `geo_point` and `geo_shape`, it is possible to get different results when aggregating at lower resolutions. +* As is the case with [`geotile_grid`](/reference/data-analysis/aggregations/search-aggregations-bucket-geotilegrid-aggregation.md#geotilegrid-aggregating-geo-shape), a single shape can be counted for in multiple tiles. A shape will contribute to the count of matching values if any part of its shape intersects with that tile. Below is an image that demonstrates this: + +![geoshape hexgrid](../../../images/geoshape_hexgrid.png "") + + +## Options [_options_4] + +field +: (Required, string) Field containing indexed geo-point or geo-shape values. Must be explicitly mapped as a [`geo_point`](/reference/elasticsearch/mapping-reference/geo-point.md) or a [`geo_shape`](/reference/elasticsearch/mapping-reference/geo-shape.md) field. If the field contains an array, `geohex_grid` aggregates all array values. + +precision +: (Optional, integer) Integer zoom of the key used to define cells/buckets in the results. Defaults to `6`. Values outside of [`0`,`15`] will be rejected. + +bounds +: (Optional, object) Bounding box used to filter the geo-points or geo-shapes in each bucket. Accepts the same bounding box formats as the [geo-bounding box query](/reference/query-languages/query-dsl-geo-bounding-box-query.md#query-dsl-geo-bounding-box-query-accepted-formats). + +size +: (Optional, integer) Maximum number of buckets to return. Defaults to 10,000. When results are trimmed, buckets are prioritized based on the volume of documents they contain. + +shard_size +: (Optional, integer) Number of buckets returned from each shard. Defaults to `max(10,(size x number-of-shards))` to allow for a more accurate count of the top cells in the final result. Since each shard could have a different top result order, using a larger number here reduces the risk of inaccurate counts, but incurs a performance cost. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-geotilegrid-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-geotilegrid-aggregation.md new file mode 100644 index 0000000000000..944f4ab40ef53 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-geotilegrid-aggregation.md @@ -0,0 +1,231 @@ +--- +navigation_title: "Geotile grid" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-geotilegrid-aggregation.html +--- + +# Geotile grid aggregation [search-aggregations-bucket-geotilegrid-aggregation] + + +A multi-bucket aggregation that groups [`geo_point`](/reference/elasticsearch/mapping-reference/geo-point.md) and [`geo_shape`](/reference/elasticsearch/mapping-reference/geo-shape.md) values into buckets that represent a grid. The resulting grid can be sparse and only contains cells that have matching data. Each cell corresponds to a [map tile](https://en.wikipedia.org/wiki/Tiled_web_map) as used by many online map sites. Each cell is labeled using a `"{{zoom}}/{x}/{{y}}"` format, where zoom is equal to the user-specified precision. + +* High precision keys have a larger range for x and y, and represent tiles that cover only a small area. +* Low precision keys have a smaller range for x and y, and represent tiles that each cover a large area. + +See [zoom level documentation](https://wiki.openstreetmap.org/wiki/Zoom_levels) on how precision (zoom) correlates to size on the ground. Precision for this aggregation can be between 0 and 29, inclusive. + +::::{warning} +The highest-precision geotile of length 29 produces cells that cover less than a 10cm by 10cm of land and so high-precision requests can be very costly in terms of RAM and result sizes. Please see the example below on how to first filter the aggregation to a smaller geographic area before requesting high-levels of detail. +:::: + + +You can only use `geotile_grid` to aggregate an explicitly mapped `geo_point` or `geo_shape` field. If the `geo_point` field contains an array, `geotile_grid` aggregates all the array values. + +## Simple low-precision request [_simple_low_precision_request_2] + +$$$geotilegrid-aggregation-example$$$ + +```console +PUT /museums +{ + "mappings": { + "properties": { + "location": { + "type": "geo_point" + } + } + } +} + +POST /museums/_bulk?refresh +{"index":{"_id":1}} +{"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} +{"index":{"_id":2}} +{"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} +{"index":{"_id":3}} +{"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} +{"index":{"_id":4}} +{"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} +{"index":{"_id":5}} +{"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} +{"index":{"_id":6}} +{"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} + +POST /museums/_search?size=0 +{ + "aggregations": { + "large-grid": { + "geotile_grid": { + "field": "location", + "precision": 8 + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "large-grid": { + "buckets": [ + { + "key": "8/131/84", + "doc_count": 3 + }, + { + "key": "8/129/88", + "doc_count": 2 + }, + { + "key": "8/131/85", + "doc_count": 1 + } + ] + } + } +} +``` + + +## High-precision requests [geotilegrid-high-precision] + +When requesting detailed buckets (typically for displaying a "zoomed in" map), a filter like [geo_bounding_box](/reference/query-languages/query-dsl-geo-bounding-box-query.md) should be applied to narrow the subject area. Otherwise, potentially millions of buckets will be created and returned. + +$$$geotilegrid-high-precision-ex$$$ + +```console +POST /museums/_search?size=0 +{ + "aggregations": { + "zoomed-in": { + "filter": { + "geo_bounding_box": { + "location": { + "top_left": "POINT (4.9 52.4)", + "bottom_right": "POINT (5.0 52.3)" + } + } + }, + "aggregations": { + "zoom1": { + "geotile_grid": { + "field": "location", + "precision": 22 + } + } + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "zoomed-in": { + "doc_count": 3, + "zoom1": { + "buckets": [ + { + "key": "22/2154412/1378379", + "doc_count": 1 + }, + { + "key": "22/2154385/1378332", + "doc_count": 1 + }, + { + "key": "22/2154259/1378425", + "doc_count": 1 + } + ] + } + } + } +} +``` + + +## Requests with additional bounding box filtering [geotilegrid-addtl-bounding-box-filtering] + +The `geotile_grid` aggregation supports an optional `bounds` parameter that restricts the cells considered to those that intersect the provided bounds. The `bounds` parameter accepts the same [bounding box formats](/reference/query-languages/query-dsl-geo-bounding-box-query.md#query-dsl-geo-bounding-box-query-accepted-formats) as the geo-bounding box query. This bounding box can be used with or without an additional `geo_bounding_box` query for filtering the points prior to aggregating. It is an independent bounding box that can intersect with, be equal to, or be disjoint to any additional `geo_bounding_box` queries defined in the context of the aggregation. + +$$$geotilegrid-aggregation-with-bounds$$$ + +```console +POST /museums/_search?size=0 +{ + "aggregations": { + "tiles-in-bounds": { + "geotile_grid": { + "field": "location", + "precision": 22, + "bounds": { + "top_left": "POINT (4.9 52.4)", + "bottom_right": "POINT (5.0 52.3)" + } + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "tiles-in-bounds": { + "buckets": [ + { + "key": "22/2154412/1378379", + "doc_count": 1 + }, + { + "key": "22/2154385/1378332", + "doc_count": 1 + }, + { + "key": "22/2154259/1378425", + "doc_count": 1 + } + ] + } + } +} +``` + + +### Aggregating `geo_shape` fields [geotilegrid-aggregating-geo-shape] + +Aggregating on [Geoshape](/reference/elasticsearch/mapping-reference/geo-shape.md) fields works almost as it does for points, except that a single shape can be counted for in multiple tiles. A shape will contribute to the count of matching values if any part of its shape intersects with that tile. Below is an image that demonstrates this: + +![geoshape grid](../../../images/geoshape_grid.png "") + + +## Options [_options_5] + +field +: (Required, string) Field containing indexed geo-point or geo-shape values. Must be explicitly mapped as a [`geo_point`](/reference/elasticsearch/mapping-reference/geo-point.md) or a [`geo_shape`](/reference/elasticsearch/mapping-reference/geo-shape.md) field. If the field contains an array, `geotile_grid` aggregates all array values. + +precision +: (Optional, integer) Integer zoom of the key used to define cells/buckets in the results. Defaults to `7`. Values outside of [`0`,`29`] will be rejected. + +bounds +: (Optional, object) Bounding box used to filter the geo-points or geo-shapes in each bucket. Accepts the same bounding box formats as the [geo-bounding box query](/reference/query-languages/query-dsl-geo-bounding-box-query.md#query-dsl-geo-bounding-box-query-accepted-formats). + +size +: (Optional, integer) Maximum number of buckets to return. Defaults to 10,000. When results are trimmed, buckets are prioritized based on the volume of documents they contain. + +shard_size +: (Optional, integer) Number of buckets returned from each shard. Defaults to `max(10,(size x number-of-shards))` to allow for a more accurate count of the top cells in the final result. Since each shard could have a different top result order, using a larger number here reduces the risk of inaccurate counts, but incurs a performance cost. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-global-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-global-aggregation.md new file mode 100644 index 0000000000000..85ce812ea0502 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-global-aggregation.md @@ -0,0 +1,68 @@ +--- +navigation_title: "Global" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-global-aggregation.html +--- + +# Global aggregation [search-aggregations-bucket-global-aggregation] + + +Defines a single bucket of all the documents within the search execution context. This context is defined by the indices and the document types you’re searching on, but is **not** influenced by the search query itself. + +::::{note} +Global aggregators can only be placed as top level aggregators because it doesn’t make sense to embed a global aggregator within another bucket aggregator. +:::: + + +Example: + +$$$global-aggregation-example$$$ + +```console +POST /sales/_search?size=0 +{ + "query": { + "match": { "type": "t-shirt" } + }, + "aggs": { + "all_products": { + "global": {}, <1> + "aggs": { <2> + "avg_price": { "avg": { "field": "price" } } + } + }, + "t_shirts": { "avg": { "field": "price" } } + } +} +``` + +1. The `global` aggregation has an empty body +2. The sub-aggregations that are registered for this `global` aggregation + + +The above aggregation demonstrates how one would compute aggregations (`avg_price` in this example) on all the documents in the search context, regardless of the query (in our example, it will compute the average price over all products in our catalog, not just on the "shirts"). + +The response for the above aggregation: + +```console-result +{ + ... + "aggregations": { + "all_products": { + "doc_count": 7, <1> + "avg_price": { + "value": 140.71428571428572 <2> + } + }, + "t_shirts": { + "value": 128.33333333333334 <3> + } + } +} +``` + +1. The number of documents that were aggregated (in our case, all documents within the search context) +2. The average price of all products in the index +3. The average price of all t-shirts + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-histogram-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-histogram-aggregation.md new file mode 100644 index 0000000000000..198ab127e783b --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-histogram-aggregation.md @@ -0,0 +1,379 @@ +--- +navigation_title: "Histogram" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-histogram-aggregation.html +--- + +# Histogram aggregation [search-aggregations-bucket-histogram-aggregation] + + +A multi-bucket values source based aggregation that can be applied on numeric values or numeric range values extracted from the documents. It dynamically builds fixed size (a.k.a. interval) buckets over the values. For example, if the documents have a field that holds a price (numeric), we can configure this aggregation to dynamically build buckets with interval `5` (in case of price it may represent $5). When the aggregation executes, the price field of every document will be evaluated and will be rounded down to its closest bucket - for example, if the price is `32` and the bucket size is `5` then the rounding will yield `30` and thus the document will "fall" into the bucket that is associated with the key `30`. To make this more formal, here is the rounding function that is used: + +```java +bucket_key = Math.floor((value - offset) / interval) * interval + offset +``` + +For range values, a document can fall into multiple buckets. The first bucket is computed from the lower bound of the range in the same way as a bucket for a single value is computed. The final bucket is computed in the same way from the upper bound of the range, and the range is counted in all buckets in between and including those two. + +The `interval` must be a positive decimal, while the `offset` must be a decimal in `[0, interval)` (a decimal greater than or equal to `0` and less than `interval`) + +The following snippet "buckets" the products based on their `price` by interval of `50`: + +$$$histogram-aggregation-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "prices": { + "histogram": { + "field": "price", + "interval": 50 + } + } + } +} +``` + +And the following may be the response: + +```console-result +{ + ... + "aggregations": { + "prices": { + "buckets": [ + { + "key": 0.0, + "doc_count": 1 + }, + { + "key": 50.0, + "doc_count": 1 + }, + { + "key": 100.0, + "doc_count": 0 + }, + { + "key": 150.0, + "doc_count": 2 + }, + { + "key": 200.0, + "doc_count": 3 + } + ] + } + } +} +``` + +## Minimum document count [_minimum_document_count] + +The response above show that no documents has a price that falls within the range of `[100, 150)`. By default the response will fill gaps in the histogram with empty buckets. It is possible to change that and request buckets with a higher minimum count thanks to the `min_doc_count` setting: + +$$$histogram-aggregation-min-doc-count-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "prices": { + "histogram": { + "field": "price", + "interval": 50, + "min_doc_count": 1 + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "prices": { + "buckets": [ + { + "key": 0.0, + "doc_count": 1 + }, + { + "key": 50.0, + "doc_count": 1 + }, + { + "key": 150.0, + "doc_count": 2 + }, + { + "key": 200.0, + "doc_count": 3 + } + ] + } + } +} +``` + +$$$search-aggregations-bucket-histogram-aggregation-extended-bounds$$$ +By default the `histogram` returns all the buckets within the range of the data itself, that is, the documents with the smallest values (on which with histogram) will determine the min bucket (the bucket with the smallest key) and the documents with the highest values will determine the max bucket (the bucket with the highest key). Often, when requesting empty buckets, this causes a confusion, specifically, when the data is also filtered. + +To understand why, let’s look at an example: + +Lets say the you’re filtering your request to get all docs with values between `0` and `500`, in addition you’d like to slice the data per price using a histogram with an interval of `50`. You also specify `"min_doc_count" : 0` as you’d like to get all buckets even the empty ones. If it happens that all products (documents) have prices higher than `100`, the first bucket you’ll get will be the one with `100` as its key. This is confusing, as many times, you’d also like to get those buckets between `0 - 100`. + +With `extended_bounds` setting, you now can "force" the histogram aggregation to start building buckets on a specific `min` value and also keep on building buckets up to a `max` value (even if there are no documents anymore). Using `extended_bounds` only makes sense when `min_doc_count` is 0 (the empty buckets will never be returned if `min_doc_count` is greater than 0). + +Note that (as the name suggest) `extended_bounds` is **not** filtering buckets. Meaning, if the `extended_bounds.min` is higher than the values extracted from the documents, the documents will still dictate what the first bucket will be (and the same goes for the `extended_bounds.max` and the last bucket). For filtering buckets, one should nest the histogram aggregation under a range `filter` aggregation with the appropriate `from`/`to` settings. + +Example: + +$$$histogram-aggregation-extended-bounds-example$$$ + +```console +POST /sales/_search?size=0 +{ + "query": { + "constant_score": { "filter": { "range": { "price": { "lte": "500" } } } } + }, + "aggs": { + "prices": { + "histogram": { + "field": "price", + "interval": 50, + "extended_bounds": { + "min": 0, + "max": 500 + } + } + } + } +} +``` + +When aggregating ranges, buckets are based on the values of the returned documents. This means the response may include buckets outside of a query’s range. For example, if your query looks for values greater than 100, and you have a range covering 50 to 150, and an interval of 50, that document will land in 3 buckets - 50, 100, and 150. In general, it’s best to think of the query and aggregation steps as independent - the query selects a set of documents, and then the aggregation buckets those documents without regard to how they were selected. See [note on bucketing range fields](/reference/data-analysis/aggregations/search-aggregations-bucket-range-field-note.md) for more information and an example. + +$$$search-aggregations-bucket-histogram-aggregation-hard-bounds$$$ +The `hard_bounds` is a counterpart of `extended_bounds` and can limit the range of buckets in the histogram. It is particularly useful in the case of open [data ranges](/reference/elasticsearch/mapping-reference/range.md) that can result in a very large number of buckets. + +Example: + +$$$histogram-aggregation-hard-bounds-example$$$ + +```console +POST /sales/_search?size=0 +{ + "query": { + "constant_score": { "filter": { "range": { "price": { "lte": "500" } } } } + }, + "aggs": { + "prices": { + "histogram": { + "field": "price", + "interval": 50, + "hard_bounds": { + "min": 100, + "max": 200 + } + } + } + } +} +``` + +In this example even though the range specified in the query is up to 500, the histogram will only have 2 buckets starting at 100 and 150. All other buckets will be omitted even if documents that should go to this buckets are present in the results. + + +## Order [_order_2] + +By default the returned buckets are sorted by their `key` ascending, though the order behaviour can be controlled using the `order` setting. Supports the same `order` functionality as the [`Terms Aggregation`](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md#search-aggregations-bucket-terms-aggregation-order). + + +## Offset [_offset] + +By default the bucket keys start with 0 and then continue in even spaced steps of `interval`, e.g. if the interval is `10`, the first three buckets (assuming there is data inside them) will be `[0, 10)`, `[10, 20)`, `[20, 30)`. The bucket boundaries can be shifted by using the `offset` option. + +This can be best illustrated with an example. If there are 10 documents with values ranging from 5 to 14, using interval `10` will result in two buckets with 5 documents each. If an additional offset `5` is used, there will be only one single bucket `[5, 15)` containing all the 10 documents. + + +## Response Format [_response_format] + +By default, the buckets are returned as an ordered array. It is also possible to request the response as a hash instead keyed by the buckets keys: + +$$$histogram-aggregation-keyed-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "prices": { + "histogram": { + "field": "price", + "interval": 50, + "keyed": true + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "prices": { + "buckets": { + "0.0": { + "key": 0.0, + "doc_count": 1 + }, + "50.0": { + "key": 50.0, + "doc_count": 1 + }, + "100.0": { + "key": 100.0, + "doc_count": 0 + }, + "150.0": { + "key": 150.0, + "doc_count": 2 + }, + "200.0": { + "key": 200.0, + "doc_count": 3 + } + } + } + } +} +``` + + +## Missing value [_missing_value_2] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +$$$histogram-aggregation-missing-value-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "quantity": { + "histogram": { + "field": "quantity", + "interval": 10, + "missing": 0 <1> + } + } + } +} +``` + +1. Documents without a value in the `quantity` field will fall into the same bucket as documents that have the value `0`. + + + +## Histogram fields [search-aggregations-bucket-histogram-aggregation-histogram-fields] + +Running a histogram aggregation over histogram fields computes the total number of counts for each interval. + +For example, executing a histogram aggregation against the following index that stores pre-aggregated histograms with latency metrics (in milliseconds) for different networks: + +```console +PUT metrics_index +{ + "mappings": { + "properties": { + "network": { + "properties": { + "name": { + "type": "keyword" + } + } + }, + "latency_histo": { + "type": "histogram" + } + } + } +} + +PUT metrics_index/_doc/1?refresh +{ + "network.name" : "net-1", + "latency_histo" : { + "values" : [1, 3, 8, 12, 15], + "counts" : [3, 7, 23, 12, 6] + } +} + +PUT metrics_index/_doc/2?refresh +{ + "network.name" : "net-2", + "latency_histo" : { + "values" : [1, 6, 8, 12, 14], + "counts" : [8, 17, 8, 7, 6] + } +} + +POST /metrics_index/_search?size=0 +{ + "aggs": { + "latency_buckets": { + "histogram": { + "field": "latency_histo", + "interval": 5 + } + } + } +} +``` + +The `histogram` aggregation will sum the counts of each interval computed based on the `values` and return the following output: + +```console-result +{ + ... + "aggregations": { + "latency_buckets": { + "buckets": [ + { + "key": 0.0, + "doc_count": 18 + }, + { + "key": 5.0, + "doc_count": 48 + }, + { + "key": 10.0, + "doc_count": 25 + }, + { + "key": 15.0, + "doc_count": 6 + } + ] + } + } +} +``` + +::::{important} +Histogram aggregation is a bucket aggregation, which partitions documents into buckets rather than calculating metrics over fields like metrics aggregations do. Each bucket represents a collection of documents which sub-aggregations can run on. On the other hand, a histogram field is a pre-aggregated field representing multiple values inside a single field: buckets of numerical data and a count of items/documents for each bucket. This mismatch between the histogram aggregations expected input (expecting raw documents) and the histogram field (that provides summary information) limits the outcome of the aggregation to only the doc counts for each bucket. + +**Consequently, when executing a histogram aggregation over a histogram field, no sub-aggregations are allowed.** + +:::: + + +Also, when running histogram aggregation over histogram field the `missing` parameter is not supported. diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-ipprefix-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-ipprefix-aggregation.md new file mode 100644 index 0000000000000..f15f177f6d5e9 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-ipprefix-aggregation.md @@ -0,0 +1,366 @@ +--- +navigation_title: "IP prefix" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-ipprefix-aggregation.html +--- + +# IP prefix aggregation [search-aggregations-bucket-ipprefix-aggregation] + + +A bucket aggregation that groups documents based on the network or sub-network of an IP address. An IP address consists of two groups of bits: the most significant bits which represent the network prefix, and the least significant bits which represent the host. + +## Example [ipprefix-agg-ex] + +For example, consider the following index: + +```console +PUT network-traffic +{ + "mappings": { + "properties": { + "ipv4": { "type": "ip" }, + "ipv6": { "type": "ip" } + } + } +} + +POST /network-traffic/_bulk?refresh +{"index":{"_id":0}} +{"ipv4":"192.168.1.10","ipv6":"2001:db8:a4f8:112a:6001:0:12:7f10"} +{"index":{"_id":1}} +{"ipv4":"192.168.1.12","ipv6":"2001:db8:a4f8:112a:6001:0:12:7f12"} +{"index":{"_id":2}} +{ "ipv4":"192.168.1.33","ipv6":"2001:db8:a4f8:112a:6001:0:12:7f33"} +{"index":{"_id":3}} +{"ipv4":"192.168.1.10","ipv6":"2001:db8:a4f8:112a:6001:0:12:7f10"} +{"index":{"_id":4}} +{"ipv4":"192.168.2.41","ipv6":"2001:db8:a4f8:112c:6001:0:12:7f41"} +{"index":{"_id":5}} +{"ipv4":"192.168.2.10","ipv6":"2001:db8:a4f8:112c:6001:0:12:7f10"} +{"index":{"_id":6}} +{"ipv4":"192.168.2.23","ipv6":"2001:db8:a4f8:112c:6001:0:12:7f23"} +{"index":{"_id":7}} +{"ipv4":"192.168.3.201","ipv6":"2001:db8:a4f8:114f:6001:0:12:7201"} +{"index":{"_id":8}} +{"ipv4":"192.168.3.107","ipv6":"2001:db8:a4f8:114f:6001:0:12:7307"} +``` + +The following aggregation groups documents into buckets. Each bucket identifies a different sub-network. The sub-network is calculated by applying a netmask with prefix length of `24` to each IP address in the `ipv4` field: + +$$$ip-prefix-ipv4-example$$$ + +```console +GET /network-traffic/_search +{ + "size": 0, + "aggs": { + "ipv4-subnets": { + "ip_prefix": { + "field": "ipv4", + "prefix_length": 24 + } + } + } +} +``` + +Response: + +```console-result +{ + ... + + "aggregations": { + "ipv4-subnets": { + "buckets": [ + { + "key": "192.168.1.0", + "is_ipv6": false, + "doc_count": 4, + "prefix_length": 24, + "netmask": "255.255.255.0" + }, + { + "key": "192.168.2.0", + "is_ipv6": false, + "doc_count": 3, + "prefix_length": 24, + "netmask": "255.255.255.0" + }, + { + "key": "192.168.3.0", + "is_ipv6": false, + "doc_count": 2, + "prefix_length": 24, + "netmask": "255.255.255.0" + } + ] + } + } +} +``` + +To aggregate IPv6 addresses, set `is_ipv6` to `true`. + +$$$ip-prefix-ipv6-example$$$ + +```console +GET /network-traffic/_search +{ + "size": 0, + "aggs": { + "ipv6-subnets": { + "ip_prefix": { + "field": "ipv6", + "prefix_length": 64, + "is_ipv6": true + } + } + } +} +``` + +If `is_ipv6` is `true`, the response doesn’t include a `netmask` for each bucket. + +```console-result +{ + ... + + "aggregations": { + "ipv6-subnets": { + "buckets": [ + { + "key": "2001:db8:a4f8:112a::", + "is_ipv6": true, + "doc_count": 4, + "prefix_length": 64 + }, + { + "key": "2001:db8:a4f8:112c::", + "is_ipv6": true, + "doc_count": 3, + "prefix_length": 64 + }, + { + "key": "2001:db8:a4f8:114f::", + "is_ipv6": true, + "doc_count": 2, + "prefix_length": 64 + } + ] + } + } +} +``` + + +## Parameters [ip-prefix-agg-params] + +`field` +: (Required, string) The document IP address field to aggregate on. The field mapping type must be [`ip`](/reference/elasticsearch/mapping-reference/ip.md). + +`prefix_length` +: (Required, integer) Length of the network prefix. For IPv4 addresses, the accepted range is `[0, 32]`. For IPv6 addresses, the accepted range is `[0, 128]`. + +`is_ipv6` +: (Optional, boolean) Defines whether the prefix applies to IPv6 addresses. Just specifying the `prefix_length` parameter is not enough to know if an IP prefix applies to IPv4 or IPv6 addresses. Defaults to `false`. + +`append_prefix_length` +: (Optional, boolean) Defines whether the prefix length is appended to IP address keys in the response. Defaults to `false`. + +`keyed` +: (Optional, boolean) Defines whether buckets are returned as a hash rather than an array in the response. Defaults to `false`. + +`min_doc_count` +: (Optional, integer) Defines the minimum number of documents for buckets to be included in the response. Defaults to `1`. + + +## Response body [ipprefix-agg-response] + +`key` +: (string) The IPv6 or IPv4 subnet. + +`prefix_length` +: (integer) The length of the prefix used to aggregate the bucket. + +`doc_count` +: (integer) Number of documents matching a specific IP prefix. + +`is_ipv6` +: (boolean) Defines whether the netmask is an IPv6 netmask. + +`netmask` +: (string) The IPv4 netmask. If `is_ipv6` is `true` in the request, this field is missing in the response. + + +## Keyed Response [ipprefix-agg-keyed-response] + +Set the `keyed` flag of `true` to associate an unique IP address key with each bucket and return sub-networks as a hash rather than an array. + +Example: + +$$$ip-prefix-keyed-example$$$ + +```console +GET /network-traffic/_search +{ + "size": 0, + "aggs": { + "ipv4-subnets": { + "ip_prefix": { + "field": "ipv4", + "prefix_length": 24, + "keyed": true + } + } + } +} +``` + +Response: + +```console-result +{ + ... + + "aggregations": { + "ipv4-subnets": { + "buckets": { + "192.168.1.0": { + "is_ipv6": false, + "doc_count": 4, + "prefix_length": 24, + "netmask": "255.255.255.0" + }, + "192.168.2.0": { + "is_ipv6": false, + "doc_count": 3, + "prefix_length": 24, + "netmask": "255.255.255.0" + }, + "192.168.3.0": { + "is_ipv6": false, + "doc_count": 2, + "prefix_length": 24, + "netmask": "255.255.255.0" + } + } + } + } +} +``` + + +## Append the prefix length to the IP address key [ipprefix-agg-append-prefix-length] + +Set the `append_prefix_length` flag to `true` to catenate IP address keys with the prefix length of the sub-network. + +Example: + +$$$ip-prefix-append-prefix-len-example$$$ + +```console +GET /network-traffic/_search +{ + "size": 0, + "aggs": { + "ipv4-subnets": { + "ip_prefix": { + "field": "ipv4", + "prefix_length": 24, + "append_prefix_length": true + } + } + } +} +``` + +Response: + +```console-result +{ + ... + + "aggregations": { + "ipv4-subnets": { + "buckets": [ + { + "key": "192.168.1.0/24", + "is_ipv6": false, + "doc_count": 4, + "prefix_length": 24, + "netmask": "255.255.255.0" + }, + { + "key": "192.168.2.0/24", + "is_ipv6": false, + "doc_count": 3, + "prefix_length": 24, + "netmask": "255.255.255.0" + }, + { + "key": "192.168.3.0/24", + "is_ipv6": false, + "doc_count": 2, + "prefix_length": 24, + "netmask": "255.255.255.0" + } + ] + } + } +} +``` + + +## Minimum document count [ipprefix-agg-min-doc-count] + +Use the `min_doc_count` parameter to only return buckets with a minimum number of documents. + +$$$ip-prefix-min-doc-count-example$$$ + +```console +GET /network-traffic/_search +{ + "size": 0, + "aggs": { + "ipv4-subnets": { + "ip_prefix": { + "field": "ipv4", + "prefix_length": 24, + "min_doc_count": 3 + } + } + } +} +``` + +Response: + +```console-result +{ + ... + + "aggregations": { + "ipv4-subnets": { + "buckets": [ + { + "key": "192.168.1.0", + "is_ipv6": false, + "doc_count": 4, + "prefix_length": 24, + "netmask": "255.255.255.0" + }, + { + "key": "192.168.2.0", + "is_ipv6": false, + "doc_count": 3, + "prefix_length": 24, + "netmask": "255.255.255.0" + } + ] + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-iprange-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-iprange-aggregation.md new file mode 100644 index 0000000000000..b8bbd060c117a --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-iprange-aggregation.md @@ -0,0 +1,202 @@ +--- +navigation_title: "IP range" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-iprange-aggregation.html +--- + +# IP range aggregation [search-aggregations-bucket-iprange-aggregation] + + +Just like the dedicated [date](/reference/data-analysis/aggregations/search-aggregations-bucket-daterange-aggregation.md) range aggregation, there is also a dedicated range aggregation for IP typed fields: + +Example: + +$$$ip-range-example$$$ + +```console +GET /ip_addresses/_search +{ + "size": 10, + "aggs": { + "ip_ranges": { + "ip_range": { + "field": "ip", + "ranges": [ + { "to": "10.0.0.5" }, + { "from": "10.0.0.5" } + ] + } + } + } +} +``` + +Response: + +```console-result +{ + ... + + "aggregations": { + "ip_ranges": { + "buckets": [ + { + "key": "*-10.0.0.5", + "to": "10.0.0.5", + "doc_count": 10 + }, + { + "key": "10.0.0.5-*", + "from": "10.0.0.5", + "doc_count": 260 + } + ] + } + } +} +``` + +IP ranges can also be defined as CIDR masks: + +$$$ip-range-cidr-example$$$ + +```console +GET /ip_addresses/_search +{ + "size": 0, + "aggs": { + "ip_ranges": { + "ip_range": { + "field": "ip", + "ranges": [ + { "mask": "10.0.0.0/25" }, + { "mask": "10.0.0.127/25" } + ] + } + } + } +} +``` + +Response: + +```console-result +{ + ... + + "aggregations": { + "ip_ranges": { + "buckets": [ + { + "key": "10.0.0.0/25", + "from": "10.0.0.0", + "to": "10.0.0.128", + "doc_count": 128 + }, + { + "key": "10.0.0.127/25", + "from": "10.0.0.0", + "to": "10.0.0.128", + "doc_count": 128 + } + ] + } + } +} +``` + +## Keyed Response [_keyed_response_3] + +Setting the `keyed` flag to `true` will associate a unique string key with each bucket and return the ranges as a hash rather than an array: + +$$$ip-range-keyed-example$$$ + +```console +GET /ip_addresses/_search +{ + "size": 0, + "aggs": { + "ip_ranges": { + "ip_range": { + "field": "ip", + "ranges": [ + { "to": "10.0.0.5" }, + { "from": "10.0.0.5" } + ], + "keyed": true + } + } + } +} +``` + +Response: + +```console-result +{ + ... + + "aggregations": { + "ip_ranges": { + "buckets": { + "*-10.0.0.5": { + "to": "10.0.0.5", + "doc_count": 10 + }, + "10.0.0.5-*": { + "from": "10.0.0.5", + "doc_count": 260 + } + } + } + } +} +``` + +It is also possible to customize the key for each range: + +$$$ip-range-keyed-customized-keys-example$$$ + +```console +GET /ip_addresses/_search +{ + "size": 0, + "aggs": { + "ip_ranges": { + "ip_range": { + "field": "ip", + "ranges": [ + { "key": "infinity", "to": "10.0.0.5" }, + { "key": "and-beyond", "from": "10.0.0.5" } + ], + "keyed": true + } + } + } +} +``` + +Response: + +```console-result +{ + ... + + "aggregations": { + "ip_ranges": { + "buckets": { + "infinity": { + "to": "10.0.0.5", + "doc_count": 10 + }, + "and-beyond": { + "from": "10.0.0.5", + "doc_count": 260 + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-missing-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-missing-aggregation.md new file mode 100644 index 0000000000000..b2e02b12f1ef7 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-missing-aggregation.md @@ -0,0 +1,41 @@ +--- +navigation_title: "Missing" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-missing-aggregation.html +--- + +# Missing aggregation [search-aggregations-bucket-missing-aggregation] + + +A field data based single bucket aggregation, that creates a bucket of all documents in the current document set context that are missing a field value (effectively, missing a field or having the configured NULL value set). This aggregator will often be used in conjunction with other field data bucket aggregators (such as ranges) to return information for all the documents that could not be placed in any of the other buckets due to missing field data values. + +Example: + +$$$missing-aggregation-example$$$ + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "products_without_a_price": { + "missing": { "field": "price" } + } + } +} +``` + +In the above example, we get the total number of products that do not have a price. + +Response: + +```console-result +{ + ... + "aggregations": { + "products_without_a_price": { + "doc_count": 0 + } + } +} +``` + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-multi-terms-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-multi-terms-aggregation.md new file mode 100644 index 0000000000000..3b0e825186a66 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-multi-terms-aggregation.md @@ -0,0 +1,382 @@ +--- +navigation_title: "Multi Terms" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-multi-terms-aggregation.html +--- + +# Multi Terms aggregation [search-aggregations-bucket-multi-terms-aggregation] + + +A multi-bucket value source based aggregation where buckets are dynamically built - one per unique set of values. The multi terms aggregation is very similar to the [`terms aggregation`](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md#search-aggregations-bucket-terms-aggregation-order), however in most cases it will be slower than the terms aggregation and will consume more memory. Therefore, if the same set of fields is constantly used, it would be more efficient to index a combined key for this fields as a separate field and use the terms aggregation on this field. + +The multi_term aggregations are the most useful when you need to sort by a number of document or a metric aggregation on a composite key and get top N results. If sorting is not required and all values are expected to be retrieved using nested terms aggregation or [`composite aggregations`](/reference/data-analysis/aggregations/search-aggregations-bucket-composite-aggregation.md) will be a faster and more memory efficient solution. + +Example: + +$$$multi-terms-aggregation-example$$$ + +```console +GET /products/_search +{ + "aggs": { + "genres_and_products": { + "multi_terms": { + "terms": [{ + "field": "genre" <1> + }, { + "field": "product" + }] + } + } + } +} +``` + +1. `multi_terms` aggregation can work with the same field types as a [`terms aggregation`](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md#search-aggregations-bucket-terms-aggregation-order) and supports most of the terms aggregation parameters. + + +Response: + +```console-result +{ + ... + "aggregations" : { + "genres_and_products" : { + "doc_count_error_upper_bound" : 0, <1> + "sum_other_doc_count" : 0, <2> + "buckets" : [ <3> + { + "key" : [ <4> + "rock", + "Product A" + ], + "key_as_string" : "rock|Product A", + "doc_count" : 2 + }, + { + "key" : [ + "electronic", + "Product B" + ], + "key_as_string" : "electronic|Product B", + "doc_count" : 1 + }, + { + "key" : [ + "jazz", + "Product B" + ], + "key_as_string" : "jazz|Product B", + "doc_count" : 1 + }, + { + "key" : [ + "rock", + "Product B" + ], + "key_as_string" : "rock|Product B", + "doc_count" : 1 + } + ] + } + } +} +``` + +1. an upper bound of the error on the document counts for each term, see < +2. when there are lots of unique terms, Elasticsearch only returns the top terms; this number is the sum of the document counts for all buckets that are not part of the response +3. the list of the top buckets. +4. the keys are arrays of values ordered the same ways as expression in the `terms` parameter of the aggregation + + +By default, the `multi_terms` aggregation will return the buckets for the top ten terms ordered by the `doc_count`. One can change this default behaviour by setting the `size` parameter. + +## Aggregation Parameters [search-aggregations-bucket-multi-terms-aggregation-parameters] + +The following parameters are supported. See [`terms aggregation`](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md#search-aggregations-bucket-terms-aggregation-order) for more detailed explanation of these parameters. + +size +: Optional. Defines how many term buckets should be returned out of the overall terms list. Defaults to 10. + +shard_size +: Optional. The higher the requested `size` is, the more accurate the results will be, but also, the more expensive it will be to compute the final results. The default `shard_size` is `(size * 1.5 + 10)`. + +show_term_doc_count_error +: Optional. Calculates the doc count error on per term basis. Defaults to `false` + +order +: Optional. Specifies the order of the buckets. Defaults to the number of documents per bucket. The bucket terms value is used as a tiebreaker for buckets with the same document count. + +min_doc_count +: Optional. The minimal number of documents in a bucket for it to be returned. Defaults to 1. + +shard_min_doc_count +: Optional. The minimal number of documents in a bucket on each shard for it to be returned. Defaults to `min_doc_count`. + +collect_mode +: Optional. Specifies the strategy for data collection. The `depth_first` or `breadth_first` modes are supported. Defaults to `breadth_first`. + + +## Script [search-aggregations-bucket-multi-terms-aggregation-script] + +Generating the terms using a script: + +$$$multi-terms-aggregation-runtime-field-example$$$ + +```console +GET /products/_search +{ + "runtime_mappings": { + "genre.length": { + "type": "long", + "script": "emit(doc['genre'].value.length())" + } + }, + "aggs": { + "genres_and_products": { + "multi_terms": { + "terms": [ + { + "field": "genre.length" + }, + { + "field": "product" + } + ] + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations" : { + "genres_and_products" : { + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0, + "buckets" : [ + { + "key" : [ + 4, + "Product A" + ], + "key_as_string" : "4|Product A", + "doc_count" : 2 + }, + { + "key" : [ + 4, + "Product B" + ], + "key_as_string" : "4|Product B", + "doc_count" : 2 + }, + { + "key" : [ + 10, + "Product B" + ], + "key_as_string" : "10|Product B", + "doc_count" : 1 + } + ] + } + } +} +``` + + +## Missing value [_missing_value_3] + +The `missing` parameter defines how documents that are missing a value should be treated. By default if any of the key components are missing the entire document will be ignored but it is also possible to treat them as if they had a value by using the `missing` parameter. + +$$$multi-terms-aggregation-missing-example$$$ + +```console +GET /products/_search +{ + "aggs": { + "genres_and_products": { + "multi_terms": { + "terms": [ + { + "field": "genre" + }, + { + "field": "product", + "missing": "Product Z" + } + ] + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations" : { + "genres_and_products" : { + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0, + "buckets" : [ + { + "key" : [ + "rock", + "Product A" + ], + "key_as_string" : "rock|Product A", + "doc_count" : 2 + }, + { + "key" : [ + "electronic", + "Product B" + ], + "key_as_string" : "electronic|Product B", + "doc_count" : 1 + }, + { + "key" : [ + "electronic", + "Product Z" + ], + "key_as_string" : "electronic|Product Z", <1> + "doc_count" : 1 + }, + { + "key" : [ + "jazz", + "Product B" + ], + "key_as_string" : "jazz|Product B", + "doc_count" : 1 + }, + { + "key" : [ + "rock", + "Product B" + ], + "key_as_string" : "rock|Product B", + "doc_count" : 1 + } + ] + } + } +} +``` + +1. Documents without a value in the `product` field will fall into the same bucket as documents that have the value `Product Z`. + + + +## Mixing field types [_mixing_field_types] + +::::{warning} +When aggregating on multiple indices the type of the aggregated field may not be the same in all indices. Some types are compatible with each other (`integer` and `long` or `float` and `double`) but when the types are a mix of decimal and non-decimal number the terms aggregation will promote the non-decimal numbers to decimal numbers. This can result in a loss of precision in the bucket values. +:::: + + + +## Sub aggregation and sorting examples [_sub_aggregation_and_sorting_examples] + +As most bucket aggregations the `multi_term` supports sub aggregations and ordering the buckets by metrics sub-aggregation: + +$$$multi-terms-aggregation-subaggregation-example$$$ + +```console +GET /products/_search +{ + "aggs": { + "genres_and_products": { + "multi_terms": { + "terms": [ + { + "field": "genre" + }, + { + "field": "product" + } + ], + "order": { + "total_quantity": "desc" + } + }, + "aggs": { + "total_quantity": { + "sum": { + "field": "quantity" + } + } + } + } + } +} +``` + +```console-result +{ + ... + "aggregations" : { + "genres_and_products" : { + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0, + "buckets" : [ + { + "key" : [ + "jazz", + "Product B" + ], + "key_as_string" : "jazz|Product B", + "doc_count" : 1, + "total_quantity" : { + "value" : 10.0 + } + }, + { + "key" : [ + "rock", + "Product A" + ], + "key_as_string" : "rock|Product A", + "doc_count" : 2, + "total_quantity" : { + "value" : 9.0 + } + }, + { + "key" : [ + "electronic", + "Product B" + ], + "key_as_string" : "electronic|Product B", + "doc_count" : 1, + "total_quantity" : { + "value" : 3.0 + } + }, + { + "key" : [ + "rock", + "Product B" + ], + "key_as_string" : "rock|Product B", + "doc_count" : 1, + "total_quantity" : { + "value" : 1.0 + } + } + ] + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-nested-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-nested-aggregation.md new file mode 100644 index 0000000000000..c8407fd53e2e3 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-nested-aggregation.md @@ -0,0 +1,167 @@ +--- +navigation_title: "Nested" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-nested-aggregation.html +--- + +# Nested aggregation [search-aggregations-bucket-nested-aggregation] + + +A special single bucket aggregation that enables aggregating nested documents. + +For example, lets say we have an index of products, and each product holds the list of resellers - each having its own price for the product. The mapping could look like: + +$$$nested-aggregation-example$$$ + +```console +PUT /products +{ + "mappings": { + "properties": { + "resellers": { <1> + "type": "nested", + "properties": { + "reseller": { + "type": "keyword" + }, + "price": { + "type": "double" + } + } + } + } + } +} +``` + +1. `resellers` is an array that holds nested documents. + + +The following request adds a product with two resellers: + +```console +PUT /products/_doc/0?refresh +{ + "name": "LED TV", <1> + "resellers": [ + { + "reseller": "companyA", + "price": 350 + }, + { + "reseller": "companyB", + "price": 500 + } + ] +} +``` + +1. We are using a dynamic mapping for the `name` attribute. + + +The following request returns the minimum price a product can be purchased for: + +```console +GET /products/_search?size=0 +{ + "query": { + "match": { + "name": "led tv" + } + }, + "aggs": { + "resellers": { + "nested": { + "path": "resellers" + }, + "aggs": { + "min_price": { + "min": { + "field": "resellers.price" + } + } + } + } + } +} +``` + +As you can see above, the nested aggregation requires the `path` of the nested documents within the top level documents. Then one can define any type of aggregation over these nested documents. + +Response: + +```console-result +{ + ... + "aggregations": { + "resellers": { + "doc_count": 2, + "min_price": { + "value": 350.0 + } + } + } +} +``` + +You can use a [`filter`](/reference/data-analysis/aggregations/search-aggregations-bucket-filter-aggregation.md) sub-aggregation to return results for a specific reseller. + +```console +GET /products/_search?size=0 +{ + "query": { + "match": { + "name": "led tv" + } + }, + "aggs": { + "resellers": { + "nested": { + "path": "resellers" + }, + "aggs": { + "filter_reseller": { + "filter": { + "bool": { + "filter": [ + { + "term": { + "resellers.reseller": "companyB" + } + } + ] + } + }, + "aggs": { + "min_price": { + "min": { + "field": "resellers.price" + } + } + } + } + } + } + } +} +``` + +The search returns: + +```console-result +{ + ... + "aggregations": { + "resellers": { + "doc_count": 2, + "filter_reseller": { + "doc_count": 1, + "min_price": { + "value": 500.0 + } + } + } + } +} +``` + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-parent-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-parent-aggregation.md new file mode 100644 index 0000000000000..2186ef734b9c6 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-parent-aggregation.md @@ -0,0 +1,210 @@ +--- +navigation_title: "Parent" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-parent-aggregation.html +--- + +# Parent aggregation [search-aggregations-bucket-parent-aggregation] + + +A special single bucket aggregation that selects parent documents that have the specified type, as defined in a [`join` field](/reference/elasticsearch/mapping-reference/parent-join.md). + +This aggregation has a single option: + +* `type` - The child type that should be selected. + +For example, let’s say we have an index of questions and answers. The answer type has the following `join` field in the mapping: + +$$$parent-aggregation-example$$$ + +```console +PUT parent_example +{ + "mappings": { + "properties": { + "join": { + "type": "join", + "relations": { + "question": "answer" + } + } + } + } +} +``` + +The `question` document contain a tag field and the `answer` documents contain an owner field. With the `parent` aggregation the owner buckets can be mapped to the tag buckets in a single request even though the two fields exist in two different kinds of documents. + +An example of a question document: + +```console +PUT parent_example/_doc/1 +{ + "join": { + "name": "question" + }, + "body": "

I have Windows 2003 server and i bought a new Windows 2008 server...", + "title": "Whats the best way to file transfer my site from server to a newer one?", + "tags": [ + "windows-server-2003", + "windows-server-2008", + "file-transfer" + ] +} +``` + +Examples of `answer` documents: + +```console +PUT parent_example/_doc/2?routing=1 +{ + "join": { + "name": "answer", + "parent": "1" + }, + "owner": { + "location": "Norfolk, United Kingdom", + "display_name": "Sam", + "id": 48 + }, + "body": "

Unfortunately you're pretty much limited to FTP...", + "creation_date": "2009-05-04T13:45:37.030" +} + +PUT parent_example/_doc/3?routing=1&refresh +{ + "join": { + "name": "answer", + "parent": "1" + }, + "owner": { + "location": "Norfolk, United Kingdom", + "display_name": "Troll", + "id": 49 + }, + "body": "

Use Linux...", + "creation_date": "2009-05-05T13:45:37.030" +} +``` + +The following request can be built that connects the two together: + +```console +POST parent_example/_search?size=0 +{ + "aggs": { + "top-names": { + "terms": { + "field": "owner.display_name.keyword", + "size": 10 + }, + "aggs": { + "to-questions": { + "parent": { + "type" : "answer" <1> + }, + "aggs": { + "top-tags": { + "terms": { + "field": "tags.keyword", + "size": 10 + } + } + } + } + } + } + } +} +``` + +1. The `type` points to type / mapping with the name `answer`. + + +The above example returns the top answer owners and per owner the top question tags. + +Possible response: + +```console-result +{ + "took": 9, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total" : { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "top-names": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "Sam", + "doc_count": 1, <1> + "to-questions": { + "doc_count": 1, <2> + "top-tags": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "file-transfer", + "doc_count": 1 + }, + { + "key": "windows-server-2003", + "doc_count": 1 + }, + { + "key": "windows-server-2008", + "doc_count": 1 + } + ] + } + } + }, + { + "key": "Troll", + "doc_count": 1, + "to-questions": { + "doc_count": 1, + "top-tags": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "file-transfer", + "doc_count": 1 + }, + { + "key": "windows-server-2003", + "doc_count": 1 + }, + { + "key": "windows-server-2008", + "doc_count": 1 + } + ] + } + } + } + ] + } + } +} +``` + +1. The number of answer documents with the tag `Sam`, `Troll`, etc. +2. The number of question documents that are related to answer documents with the tag `Sam`, `Troll`, etc. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-range-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-range-aggregation.md new file mode 100644 index 0000000000000..c28747d411b89 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-range-aggregation.md @@ -0,0 +1,389 @@ +--- +navigation_title: "Range" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-range-aggregation.html +--- + +# Range aggregation [search-aggregations-bucket-range-aggregation] + + +A multi-bucket value source based aggregation that enables the user to define a set of ranges - each representing a bucket. During the aggregation process, the values extracted from each document will be checked against each bucket range and "bucket" the relevant/matching document. Note that this aggregation includes the `from` value and excludes the `to` value for each range. + +Example: + +$$$range-aggregation-example$$$ + +```console +GET sales/_search +{ + "aggs": { + "price_ranges": { + "range": { + "field": "price", + "ranges": [ + { "to": 100.0 }, + { "from": 100.0, "to": 200.0 }, + { "from": 200.0 } + ] + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "price_ranges": { + "buckets": [ + { + "key": "*-100.0", + "to": 100.0, + "doc_count": 2 + }, + { + "key": "100.0-200.0", + "from": 100.0, + "to": 200.0, + "doc_count": 2 + }, + { + "key": "200.0-*", + "from": 200.0, + "doc_count": 3 + } + ] + } + } +} +``` + +## Keyed Response [_keyed_response_4] + +Setting the `keyed` flag to `true` will associate a unique string key with each bucket and return the ranges as a hash rather than an array: + +$$$range-aggregation-keyed-example$$$ + +```console +GET sales/_search +{ + "aggs": { + "price_ranges": { + "range": { + "field": "price", + "keyed": true, + "ranges": [ + { "to": 100 }, + { "from": 100, "to": 200 }, + { "from": 200 } + ] + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "price_ranges": { + "buckets": { + "*-100.0": { + "to": 100.0, + "doc_count": 2 + }, + "100.0-200.0": { + "from": 100.0, + "to": 200.0, + "doc_count": 2 + }, + "200.0-*": { + "from": 200.0, + "doc_count": 3 + } + } + } + } +} +``` + +It is also possible to customize the key for each range: + +$$$range-aggregation-custom-keys-example$$$ + +```console +GET sales/_search +{ + "aggs": { + "price_ranges": { + "range": { + "field": "price", + "keyed": true, + "ranges": [ + { "key": "cheap", "to": 100 }, + { "key": "average", "from": 100, "to": 200 }, + { "key": "expensive", "from": 200 } + ] + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "price_ranges": { + "buckets": { + "cheap": { + "to": 100.0, + "doc_count": 2 + }, + "average": { + "from": 100.0, + "to": 200.0, + "doc_count": 2 + }, + "expensive": { + "from": 200.0, + "doc_count": 3 + } + } + } + } +} +``` + + +## Script [_script] + +If the data in your documents doesn’t exactly match what you’d like to aggregate, use a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). For example, if you need to apply a particular currency conversion rate: + +$$$range-aggregation-runtime-field-example$$$ + +```console +GET sales/_search +{ + "runtime_mappings": { + "price.euros": { + "type": "double", + "script": { + "source": """ + emit(doc['price'].value * params.conversion_rate) + """, + "params": { + "conversion_rate": 0.835526591 + } + } + } + }, + "aggs": { + "price_ranges": { + "range": { + "field": "price.euros", + "ranges": [ + { "to": 100 }, + { "from": 100, "to": 200 }, + { "from": 200 } + ] + } + } + } +} +``` + + +## Sub Aggregations [_sub_aggregations_2] + +The following example, not only "bucket" the documents to the different buckets but also computes statistics over the prices in each price range + +$$$range-aggregation-sub-aggregation-example$$$ + +```console +GET sales/_search +{ + "aggs": { + "price_ranges": { + "range": { + "field": "price", + "ranges": [ + { "to": 100 }, + { "from": 100, "to": 200 }, + { "from": 200 } + ] + }, + "aggs": { + "price_stats": { + "stats": { "field": "price" } + } + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "price_ranges": { + "buckets": [ + { + "key": "*-100.0", + "to": 100.0, + "doc_count": 2, + "price_stats": { + "count": 2, + "min": 10.0, + "max": 50.0, + "avg": 30.0, + "sum": 60.0 + } + }, + { + "key": "100.0-200.0", + "from": 100.0, + "to": 200.0, + "doc_count": 2, + "price_stats": { + "count": 2, + "min": 150.0, + "max": 175.0, + "avg": 162.5, + "sum": 325.0 + } + }, + { + "key": "200.0-*", + "from": 200.0, + "doc_count": 3, + "price_stats": { + "count": 3, + "min": 200.0, + "max": 200.0, + "avg": 200.0, + "sum": 600.0 + } + } + ] + } + } +} +``` + + +## Histogram fields [search-aggregations-bucket-range-aggregation-histogram-fields] + +Running a range aggregation over histogram fields computes the total number of counts for each configured range. + +This is done without interpolating between the histogram field values. Consequently, it is possible to have a range that is "in-between" two histogram values. The resulting range bucket would have a zero doc count. + +Here is an example, executing a range aggregation against the following index that stores pre-aggregated histograms with latency metrics (in milliseconds) for different networks: + +```console +PUT metrics_index +{ + "mappings": { + "properties": { + "network": { + "properties": { + "name": { + "type": "keyword" + } + } + }, + "latency_histo": { + "type": "histogram" + } + } + } +} + +PUT metrics_index/_doc/1?refresh +{ + "network.name" : "net-1", + "latency_histo" : { + "values" : [1, 3, 8, 12, 15], + "counts" : [3, 7, 23, 12, 6] + } +} + +PUT metrics_index/_doc/2?refresh +{ + "network.name" : "net-2", + "latency_histo" : { + "values" : [1, 6, 8, 12, 14], + "counts" : [8, 17, 8, 7, 6] + } +} + +GET metrics_index/_search?size=0&filter_path=aggregations +{ + "aggs": { + "latency_ranges": { + "range": { + "field": "latency_histo", + "ranges": [ + {"to": 2}, + {"from": 2, "to": 3}, + {"from": 3, "to": 10}, + {"from": 10} + ] + } + } + } +} +``` + +The `range` aggregation will sum the counts of each range computed based on the `values` and return the following output: + +```console-result +{ + "aggregations": { + "latency_ranges": { + "buckets": [ + { + "key": "*-2.0", + "to": 2.0, + "doc_count": 11 + }, + { + "key": "2.0-3.0", + "from": 2.0, + "to": 3.0, + "doc_count": 0 + }, + { + "key": "3.0-10.0", + "from": 3.0, + "to": 10.0, + "doc_count": 55 + }, + { + "key": "10.0-*", + "from": 10.0, + "doc_count": 31 + } + ] + } + } +} +``` + +::::{important} +Range aggregation is a bucket aggregation, which partitions documents into buckets rather than calculating metrics over fields like metrics aggregations do. Each bucket represents a collection of documents which sub-aggregations can run on. On the other hand, a histogram field is a pre-aggregated field representing multiple values inside a single field: buckets of numerical data and a count of items/documents for each bucket. This mismatch between the range aggregations expected input (expecting raw documents) and the histogram field (that provides summary information) limits the outcome of the aggregation to only the doc counts for each bucket. + +**Consequently, when executing a range aggregation over a histogram field, no sub-aggregations are allowed.** + +:::: diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-range-field-note.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-range-field-note.md new file mode 100644 index 0000000000000..9541b77c55ed7 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-range-field-note.md @@ -0,0 +1,176 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-range-field-note.html +--- + +# Subtleties of bucketing range fields [search-aggregations-bucket-range-field-note] + +## Documents are counted for each bucket they land in [_documents_are_counted_for_each_bucket_they_land_in] + +Since a range represents multiple values, running a bucket aggregation over a range field can result in the same document landing in multiple buckets. This can lead to surprising behavior, such as the sum of bucket counts being higher than the number of matched documents. For example, consider the following index: + +```console +PUT range_index +{ + "settings": { + "number_of_shards": 2 + }, + "mappings": { + "properties": { + "expected_attendees": { + "type": "integer_range" + }, + "time_frame": { + "type": "date_range", + "format": "yyyy-MM-dd||epoch_millis" + } + } + } +} + +PUT range_index/_doc/1?refresh +{ + "expected_attendees" : { + "gte" : 10, + "lte" : 20 + }, + "time_frame" : { + "gte" : "2019-10-28", + "lte" : "2019-11-04" + } +} +``` + +The range is wider than the interval in the following aggregation, and thus the document will land in multiple buckets. + +$$$range-field-aggregation-example$$$ + +```console +POST /range_index/_search?size=0 +{ + "aggs": { + "range_histo": { + "histogram": { + "field": "expected_attendees", + "interval": 5 + } + } + } +} +``` + +Since the interval is `5` (and the offset is `0` by default), we expect buckets `10`, `15`, and `20`. Our range document will fall in all three of these buckets. + +```console-result +{ + ... + "aggregations" : { + "range_histo" : { + "buckets" : [ + { + "key" : 10.0, + "doc_count" : 1 + }, + { + "key" : 15.0, + "doc_count" : 1 + }, + { + "key" : 20.0, + "doc_count" : 1 + } + ] + } + } +} +``` + +A document cannot exist partially in a bucket; For example, the above document cannot count as one-third in each of the above three buckets. In this example, since the document’s range landed in multiple buckets, the full value of that document would also be counted in any sub-aggregations for each bucket as well. + + +## Query bounds are not aggregation filters [_query_bounds_are_not_aggregation_filters] + +Another unexpected behavior can arise when a query is used to filter on the field being aggregated. In this case, a document could match the query but still have one or both of the endpoints of the range outside the query. Consider the following aggregation on the above document: + +$$$range-field-aggregation-query-bounds-example$$$ + +```console +POST /range_index/_search?size=0 +{ + "query": { + "range": { + "time_frame": { + "gte": "2019-11-01", + "format": "yyyy-MM-dd" + } + } + }, + "aggs": { + "november_data": { + "date_histogram": { + "field": "time_frame", + "calendar_interval": "day", + "format": "yyyy-MM-dd" + } + } + } +} +``` + +Even though the query only considers days in November, the aggregation generates 8 buckets (4 in October, 4 in November) because the aggregation is calculated over the ranges of all matching documents. + +```console-result +{ + ... + "aggregations" : { + "november_data" : { + "buckets" : [ + { + "key_as_string" : "2019-10-28", + "key" : 1572220800000, + "doc_count" : 1 + }, + { + "key_as_string" : "2019-10-29", + "key" : 1572307200000, + "doc_count" : 1 + }, + { + "key_as_string" : "2019-10-30", + "key" : 1572393600000, + "doc_count" : 1 + }, + { + "key_as_string" : "2019-10-31", + "key" : 1572480000000, + "doc_count" : 1 + }, + { + "key_as_string" : "2019-11-01", + "key" : 1572566400000, + "doc_count" : 1 + }, + { + "key_as_string" : "2019-11-02", + "key" : 1572652800000, + "doc_count" : 1 + }, + { + "key_as_string" : "2019-11-03", + "key" : 1572739200000, + "doc_count" : 1 + }, + { + "key_as_string" : "2019-11-04", + "key" : 1572825600000, + "doc_count" : 1 + } + ] + } + } +} +``` + +Depending on the use case, a `CONTAINS` query could limit the documents to only those that fall entirely in the queried range. In this example, the one document would not be included and the aggregation would be empty. Filtering the buckets after the aggregation is also an option, for use cases where the document should be counted but the out of bounds data can be safely ignored. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-rare-terms-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-rare-terms-aggregation.md new file mode 100644 index 0000000000000..480747123dcc2 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-rare-terms-aggregation.md @@ -0,0 +1,255 @@ +--- +navigation_title: "Rare terms" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-rare-terms-aggregation.html +--- + +# Rare terms aggregation [search-aggregations-bucket-rare-terms-aggregation] + + +A multi-bucket value source based aggregation which finds "rare" terms — terms that are at the long-tail of the distribution and are not frequent. Conceptually, this is like a `terms` aggregation that is sorted by `_count` ascending. As noted in the [terms aggregation docs](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md#search-aggregations-bucket-terms-aggregation-order), actually ordering a `terms` agg by count ascending has unbounded error. Instead, you should use the `rare_terms` aggregation + +## Syntax [_syntax_3] + +A `rare_terms` aggregation looks like this in isolation: + +```js +{ + "rare_terms": { + "field": "the_field", + "max_doc_count": 1 + } +} +``` + +| | | | | +| --- | --- | --- | --- | +| Parameter Name | Description | Required | Default Value | +| `field` | The field we wish to find rare terms in | Required | | +| `max_doc_count` | The maximum number of documents a term should appear in. | Optional | `1` | +| `precision` | The precision of the internal CuckooFilters. Smaller precision leads tobetter approximation, but higher memory usage. Cannot be smaller than `0.00001` | Optional | `0.001` | +| `include` | Terms that should be included in the aggregation | Optional | | +| `exclude` | Terms that should be excluded from the aggregation | Optional | | +| `missing` | The value that should be used if a document does not have the field being aggregated | Optional | | + +Example: + +$$$rare-terms-aggregation-example$$$ + +```console +GET /_search +{ + "aggs": { + "genres": { + "rare_terms": { + "field": "genre" + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "genres": { + "buckets": [ + { + "key": "swing", + "doc_count": 1 + } + ] + } + } +} +``` + +In this example, the only bucket that we see is the "swing" bucket, because it is the only term that appears in one document. If we increase the `max_doc_count` to `2`, we’ll see some more buckets: + +$$$rare-terms-aggregation-max-doc-count-example$$$ + +```console +GET /_search +{ + "aggs": { + "genres": { + "rare_terms": { + "field": "genre", + "max_doc_count": 2 + } + } + } +} +``` + +This now shows the "jazz" term which has a `doc_count` of 2": + +```console-result +{ + ... + "aggregations": { + "genres": { + "buckets": [ + { + "key": "swing", + "doc_count": 1 + }, + { + "key": "jazz", + "doc_count": 2 + } + ] + } + } +} +``` + + +## Maximum document count [search-aggregations-bucket-rare-terms-aggregation-max-doc-count] + +The `max_doc_count` parameter is used to control the upper bound of document counts that a term can have. There is not a size limitation on the `rare_terms` agg like `terms` agg has. This means that terms which match the `max_doc_count` criteria will be returned. The aggregation functions in this manner to avoid the order-by-ascending issues that afflict the `terms` aggregation. + +This does, however, mean that a large number of results can be returned if chosen incorrectly. To limit the danger of this setting, the maximum `max_doc_count` is 100. + + +## Max Bucket Limit [search-aggregations-bucket-rare-terms-aggregation-max-buckets] + +The Rare Terms aggregation is more liable to trip the `search.max_buckets` soft limit than other aggregations due to how it works. The `max_bucket` soft-limit is evaluated on a per-shard basis while the aggregation is collecting results. It is possible for a term to be "rare" on a shard but become "not rare" once all the shard results are merged together. This means that individual shards tend to collect more buckets than are truly rare, because they only have their own local view. This list is ultimately pruned to the correct, smaller list of rare terms on the coordinating node…​ but a shard may have already tripped the `max_buckets` soft limit and aborted the request. + +When aggregating on fields that have potentially many "rare" terms, you may need to increase the `max_buckets` soft limit. Alternatively, you might need to find a way to filter the results to return fewer rare values (smaller time span, filter by category, etc), or re-evaluate your definition of "rare" (e.g. if something appears 100,000 times, is it truly "rare"?) + + +## Document counts are approximate [search-aggregations-bucket-rare-terms-aggregation-approximate-counts] + +The naive way to determine the "rare" terms in a dataset is to place all the values in a map, incrementing counts as each document is visited, then return the bottom `n` rows. This does not scale beyond even modestly sized data sets. A sharded approach where only the "top n" values are retained from each shard (ala the `terms` aggregation) fails because the long-tail nature of the problem means it is impossible to find the "top n" bottom values without simply collecting all the values from all shards. + +Instead, the Rare Terms aggregation uses a different approximate algorithm: + +1. Values are placed in a map the first time they are seen. +2. Each addition occurrence of the term increments a counter in the map +3. If the counter > the `max_doc_count` threshold, the term is removed from the map and placed in a [CuckooFilter](https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) +4. The CuckooFilter is consulted on each term. If the value is inside the filter, it is known to be above the threshold already and skipped. + +After execution, the map of values is the map of "rare" terms under the `max_doc_count` threshold. This map and CuckooFilter are then merged with all other shards. If there are terms that are greater than the threshold (or appear in a different shard’s CuckooFilter) the term is removed from the merged list. The final map of values is returned to the user as the "rare" terms. + +CuckooFilters have the possibility of returning false positives (they can say a value exists in their collection when it actually does not). Since the CuckooFilter is being used to see if a term is over threshold, this means a false positive from the CuckooFilter will mistakenly say a value is common when it is not (and thus exclude it from it final list of buckets). Practically, this means the aggregations exhibits false-negative behavior since the filter is being used "in reverse" of how people generally think of approximate set membership sketches. + +CuckooFilters are described in more detail in the paper: + +[Fan, Bin, et al. "Cuckoo filter: Practically better than bloom."](https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) Proceedings of the 10th ACM International on Conference on emerging Networking Experiments and Technologies. ACM, 2014. + + +## Precision [_precision] + +Although the internal CuckooFilter is approximate in nature, the false-negative rate can be controlled with a `precision` parameter. This allows the user to trade more runtime memory for more accurate results. + +The default precision is `0.001`, and the smallest (e.g. most accurate and largest memory overhead) is `0.00001`. Below are some charts which demonstrate how the accuracy of the aggregation is affected by precision and number of distinct terms. + +The X-axis shows the number of distinct values the aggregation has seen, and the Y-axis shows the percent error. Each line series represents one "rarity" condition (ranging from one rare item to 100,000 rare items). For example, the orange "10" line means ten of the values were "rare" (`doc_count == 1`), out of 1-20m distinct values (where the rest of the values had `doc_count > 1`) + +This first chart shows precision `0.01`: + +![accuracy 01](../../../images/accuracy_01.png "") + +And precision `0.001` (the default): + +![accuracy 001](../../../images/accuracy_001.png "") + +And finally `precision 0.0001`: + +![accuracy 0001](../../../images/accuracy_0001.png "") + +The default precision of `0.001` maintains an accuracy of < 2.5% for the tested conditions, and accuracy slowly degrades in a controlled, linear fashion as the number of distinct values increases. + +The default precision of `0.001` has a memory profile of `1.748⁻⁶ * n` bytes, where `n` is the number of distinct values the aggregation has seen (it can also be roughly eyeballed, e.g. 20 million unique values is about 30mb of memory). The memory usage is linear to the number of distinct values regardless of which precision is chosen, the precision only affects the slope of the memory profile as seen in this chart: + +![memory](../../../images/memory.png "") + +For comparison, an equivalent terms aggregation at 20 million buckets would be roughly `20m * 69b == ~1.38gb` (with 69 bytes being a very optimistic estimate of an empty bucket cost, far lower than what the circuit breaker accounts for). So although the `rare_terms` agg is relatively heavy, it is still orders of magnitude smaller than the equivalent terms aggregation + + +## Filtering Values [_filtering_values] + +It is possible to filter the values for which buckets will be created. This can be done using the `include` and `exclude` parameters which are based on regular expression strings or arrays of exact values. Additionally, `include` clauses can filter using `partition` expressions. + +### Filtering Values with regular expressions [_filtering_values_with_regular_expressions] + +$$$rare-terms-aggregation-regex-example$$$ + +```console +GET /_search +{ + "aggs": { + "genres": { + "rare_terms": { + "field": "genre", + "include": "swi*", + "exclude": "electro*" + } + } + } +} +``` + +In the above example, buckets will be created for all the tags that starts with `swi`, except those starting with `electro` (so the tag `swing` will be aggregated but not `electro_swing`). The `include` regular expression will determine what values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`. + +The syntax is the same as [regexp queries](/reference/query-languages/regexp-syntax.md). + + +### Filtering Values with exact values [_filtering_values_with_exact_values] + +For matching based on exact values the `include` and `exclude` parameters can simply take an array of strings that represent the terms as they are found in the index: + +$$$rare-terms-aggregation-exact-value-example$$$ + +```console +GET /_search +{ + "aggs": { + "genres": { + "rare_terms": { + "field": "genre", + "include": [ "swing", "rock" ], + "exclude": [ "jazz" ] + } + } + } +} +``` + + + +## Missing value [_missing_value_4] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +$$$rare-terms-aggregation-missing-example$$$ + +```console +GET /_search +{ + "aggs": { + "genres": { + "rare_terms": { + "field": "genre", + "missing": "N/A" <1> + } + } + } +} +``` + +1. Documents without a value in the `tags` field will fall into the same bucket as documents that have the value `N/A`. + + + +## Nested, RareTerms, and scoring sub-aggregations [_nested_rareterms_and_scoring_sub_aggregations] + +The RareTerms aggregation has to operate in `breadth_first` mode, since it needs to prune terms as doc count thresholds are breached. This requirement means the RareTerms aggregation is incompatible with certain combinations of aggregations that require `depth_first`. In particular, scoring sub-aggregations that are inside a `nested` force the entire aggregation tree to run in `depth_first` mode. This will throw an exception since RareTerms is unable to process `depth_first`. + +As a concrete example, if `rare_terms` aggregation is the child of a `nested` aggregation, and one of the child aggregations of `rare_terms` needs document scores (like a `top_hits` aggregation), this will throw an exception. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-reverse-nested-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-reverse-nested-aggregation.md new file mode 100644 index 0000000000000..5282f0d491723 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-reverse-nested-aggregation.md @@ -0,0 +1,119 @@ +--- +navigation_title: "Reverse nested" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-reverse-nested-aggregation.html +--- + +# Reverse nested aggregation [search-aggregations-bucket-reverse-nested-aggregation] + + +A special single bucket aggregation that enables aggregating on parent docs from nested documents. Effectively this aggregation can break out of the nested block structure and link to other nested structures or the root document, which allows nesting other aggregations that aren’t part of the nested object in a nested aggregation. + +The `reverse_nested` aggregation must be defined inside a `nested` aggregation. + +* `path` - Which defines to what nested object field should be joined back. The default is empty, which means that it joins back to the root / main document level. The path cannot contain a reference to a nested object field that falls outside the `nested` aggregation’s nested structure a `reverse_nested` is in. + +For example, lets say we have an index for a ticket system with issues and comments. The comments are inlined into the issue documents as nested documents. The mapping could look like: + +$$$reversed-nested-aggregation-example$$$ + +```console +PUT /issues +{ + "mappings": { + "properties": { + "tags": { "type": "keyword" }, + "comments": { <1> + "type": "nested", + "properties": { + "username": { "type": "keyword" }, + "comment": { "type": "text" } + } + } + } + } +} +``` + +1. The `comments` is an array that holds nested documents under the `issue` object. + + +The following aggregations will return the top commenters' username that have commented and per top commenter the top tags of the issues the user has commented on: + +```console +GET /issues/_search +{ + "query": { + "match_all": {} + }, + "aggs": { + "comments": { + "nested": { + "path": "comments" + }, + "aggs": { + "top_usernames": { + "terms": { + "field": "comments.username" + }, + "aggs": { + "comment_to_issue": { + "reverse_nested": {}, <1> + "aggs": { + "top_tags_per_comment": { + "terms": { + "field": "tags" + } + } + } + } + } + } + } + } + } +} +``` + +As you can see above, the `reverse_nested` aggregation is put in to a `nested` aggregation as this is the only place in the dsl where the `reverse_nested` aggregation can be used. Its sole purpose is to join back to a parent doc higher up in the nested structure. + +1. A `reverse_nested` aggregation that joins back to the root / main document level, because no `path` has been defined. Via the `path` option the `reverse_nested` aggregation can join back to a different level, if multiple layered nested object types have been defined in the mapping + + +Possible response snippet: + +```console-result +{ + "aggregations": { + "comments": { + "doc_count": 1, + "top_usernames": { + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0, + "buckets": [ + { + "key": "username_1", + "doc_count": 1, + "comment_to_issue": { + "doc_count": 1, + "top_tags_per_comment": { + "doc_count_error_upper_bound" : 0, + "sum_other_doc_count" : 0, + "buckets": [ + { + "key": "tag_1", + "doc_count": 1 + } + ... + ] + } + } + } + ... + ] + } + } + } +} +``` + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-sampler-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-sampler-aggregation.md new file mode 100644 index 0000000000000..4fa9ef8b27916 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-sampler-aggregation.md @@ -0,0 +1,151 @@ +--- +navigation_title: "Sampler" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-sampler-aggregation.html +--- + +# Sampler aggregation [search-aggregations-bucket-sampler-aggregation] + + +A filtering aggregation used to limit any sub aggregations' processing to a sample of the top-scoring documents. + +* Tightening the focus of analytics to high-relevance matches rather than the potentially very long tail of low-quality matches +* Reducing the running cost of aggregations that can produce useful results using only samples e.g. `significant_terms` + +Example: + +A query on StackOverflow data for the popular term `javascript` OR the rarer term `kibana` will match many documents - most of them missing the word Kibana. To focus the `significant_terms` aggregation on top-scoring documents that are more likely to match the most interesting parts of our query we use a sample. + +$$$sampler-aggregation-example$$$ + +```console +POST /stackoverflow/_search?size=0 +{ + "query": { + "query_string": { + "query": "tags:kibana OR tags:javascript" + } + }, + "aggs": { + "sample": { + "sampler": { + "shard_size": 200 + }, + "aggs": { + "keywords": { + "significant_terms": { + "field": "tags", + "exclude": [ "kibana", "javascript" ] + } + } + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "sample": { + "doc_count": 200, <1> + "keywords": { + "doc_count": 200, + "bg_count": 650, + "buckets": [ + { + "key": "elasticsearch", + "doc_count": 150, + "score": 1.078125, + "bg_count": 200 + }, + { + "key": "logstash", + "doc_count": 50, + "score": 0.5625, + "bg_count": 50 + } + ] + } + } + } +} +``` + +1. 200 documents were sampled in total. The cost of performing the nested significant_terms aggregation was therefore limited rather than unbounded. + + +Without the `sampler` aggregation the request query considers the full "long tail" of low-quality matches and therefore identifies less significant terms such as `jquery` and `angular` rather than focusing on the more insightful Kibana-related terms. + +$$$sampler-aggregation-no-sampler-example$$$ + +```console +POST /stackoverflow/_search?size=0 +{ + "query": { + "query_string": { + "query": "tags:kibana OR tags:javascript" + } + }, + "aggs": { + "low_quality_keywords": { + "significant_terms": { + "field": "tags", + "size": 3, + "exclude": [ "kibana", "javascript" ] + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "low_quality_keywords": { + "doc_count": 600, + "bg_count": 650, + "buckets": [ + { + "key": "angular", + "doc_count": 200, + "score": 0.02777, + "bg_count": 200 + }, + { + "key": "jquery", + "doc_count": 200, + "score": 0.02777, + "bg_count": 200 + }, + { + "key": "logstash", + "doc_count": 50, + "score": 0.0069, + "bg_count": 50 + } + ] + } + } +} +``` + +## shard_size [_shard_size_2] + +The `shard_size` parameter limits how many top-scoring documents are collected in the sample processed on each shard. The default value is 100. + + +## Limitations [_limitations_7] + +### Cannot be nested under `breadth_first` aggregations [sampler-breadth-first-nested-agg] + +Being a quality-based filter the sampler aggregation needs access to the relevance score produced for each document. It therefore cannot be nested under a `terms` aggregation which has the `collect_mode` switched from the default `depth_first` mode to `breadth_first` as this discards scores. In this situation an error will be thrown. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-significantterms-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-significantterms-aggregation.md new file mode 100644 index 0000000000000..6a6a817d94a16 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-significantterms-aggregation.md @@ -0,0 +1,570 @@ +--- +navigation_title: "Significant terms" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-significantterms-aggregation.html +--- + +# Significant terms aggregation [search-aggregations-bucket-significantterms-aggregation] + + +An aggregation that returns interesting or unusual occurrences of terms in a set. + +* Suggesting "H5N1" when users search for "bird flu" in text +* Identifying the merchant that is the "common point of compromise" from the transaction history of credit card owners reporting loss +* Suggesting keywords relating to stock symbol $ATI for an automated news classifier +* Spotting the fraudulent doctor who is diagnosing more than their fair share of whiplash injuries +* Spotting the tire manufacturer who has a disproportionate number of blow-outs + +In all these cases the terms being selected are not simply the most popular terms in a set. They are the terms that have undergone a significant change in popularity measured between a *foreground* and *background* set. If the term "H5N1" only exists in 5 documents in a 10 million document index and yet is found in 4 of the 100 documents that make up a user’s search results that is significant and probably very relevant to their search. 5/10,000,000 vs 4/100 is a big swing in frequency. + +## Single-set analysis [_single_set_analysis] + +In the simplest case, the *foreground* set of interest is the search results matched by a query and the *background* set used for statistical comparisons is the index or indices from which the results were gathered. + +Example: + +$$$significantterms-aggregation-example$$$ + +```console +GET /_search +{ + "query": { + "terms": { "force": [ "British Transport Police" ] } + }, + "aggregations": { + "significant_crime_types": { + "significant_terms": { "field": "crime_type" } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "significant_crime_types": { + "doc_count": 47347, + "bg_count": 5064554, + "buckets": [ + { + "key": "Bicycle theft", + "doc_count": 3640, + "score": 0.371235374214817, + "bg_count": 66799 + } + ... + ] + } + } +} +``` + +When querying an index of all crimes from all police forces, what these results show is that the British Transport Police force stand out as a force dealing with a disproportionately large number of bicycle thefts. Ordinarily, bicycle thefts represent only 1% of crimes (66799/5064554) but for the British Transport Police, who handle crime on railways and stations, 7% of crimes (3640/47347) is a bike theft. This is a significant seven-fold increase in frequency and so this anomaly was highlighted as the top crime type. + +The problem with using a query to spot anomalies is it only gives us one subset to use for comparisons. To discover all the other police forces' anomalies we would have to repeat the query for each of the different forces. + +This can be a tedious way to look for unusual patterns in an index. + + +## Multi-set analysis [_multi_set_analysis] + +A simpler way to perform analysis across multiple categories is to use a parent-level aggregation to segment the data ready for analysis. + +Example using a parent aggregation for segmentation: + +$$$significantterms-aggregation-multiset--example$$$ + +```console +GET /_search +{ + "aggregations": { + "forces": { + "terms": { "field": "force" }, + "aggregations": { + "significant_crime_types": { + "significant_terms": { "field": "crime_type" } + } + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "forces": { + "doc_count_error_upper_bound": 1375, + "sum_other_doc_count": 7879845, + "buckets": [ + { + "key": "Metropolitan Police Service", + "doc_count": 894038, + "significant_crime_types": { + "doc_count": 894038, + "bg_count": 5064554, + "buckets": [ + { + "key": "Robbery", + "doc_count": 27617, + "score": 0.0599, + "bg_count": 53182 + } + ... + ] + } + }, + { + "key": "British Transport Police", + "doc_count": 47347, + "significant_crime_types": { + "doc_count": 47347, + "bg_count": 5064554, + "buckets": [ + { + "key": "Bicycle theft", + "doc_count": 3640, + "score": 0.371, + "bg_count": 66799 + } + ... + ] + } + } + ] + } + } +} +``` + +Now we have anomaly detection for each of the police forces using a single request. + +We can use other forms of top-level aggregations to segment our data, for example segmenting by geographic area to identify unusual hot-spots of a particular crime type: + +$$$significantterms-aggregation-hotspot-example$$$ + +```console +GET /_search +{ + "aggs": { + "hotspots": { + "geohash_grid": { + "field": "location", + "precision": 5 + }, + "aggs": { + "significant_crime_types": { + "significant_terms": { "field": "crime_type" } + } + } + } + } +} +``` + +This example uses the `geohash_grid` aggregation to create result buckets that represent geographic areas, and inside each bucket we can identify anomalous levels of a crime type in these tightly-focused areas e.g. + +* Airports exhibit unusual numbers of weapon confiscations +* Universities show uplifts of bicycle thefts + +At a higher geohash_grid zoom-level with larger coverage areas we would start to see where an entire police-force may be tackling an unusual volume of a particular crime type. + +Obviously a time-based top-level segmentation would help identify current trends for each point in time where a simple `terms` aggregation would typically show the very popular "constants" that persist across all time slots. + +::::{admonition} How are the scores calculated? +The numbers returned for scores are primarily intended for ranking different suggestions sensibly rather than something easily understood by end users. The scores are derived from the doc frequencies in *foreground* and *background* sets. In brief, a term is considered significant if there is a noticeable difference in the frequency in which a term appears in the subset and in the background. The way the terms are ranked can be configured, see "Parameters" section. + +:::: + + + +## Use on free-text fields [_use_on_free_text_fields] + +The significant_terms aggregation can be used effectively on tokenized free-text fields to suggest: + +* keywords for refining end-user searches +* keywords for use in percolator queries + +::::{warning} +Picking a free-text field as the subject of a significant terms analysis can be expensive! It will attempt to load every unique word into RAM. It is recommended to only use this on smaller indices. +:::: + + +::::{admonition} Use the *"like this but not this"* pattern +You can spot mis-categorized content by first searching a structured field e.g. `category:adultMovie` and use significant_terms on the free-text "movie_description" field. Take the suggested words (I’ll leave them to your imagination) and then search for all movies NOT marked as category:adultMovie but containing these keywords. You now have a ranked list of badly-categorized movies that you should reclassify or at least remove from the "familyFriendly" category. + +The significance score from each term can also provide a useful `boost` setting to sort matches. Using the `minimum_should_match` setting of the `terms` query with the keywords will help control the balance of precision/recall in the result set i.e a high setting would have a small number of relevant results packed full of keywords and a setting of "1" would produce a more exhaustive results set with all documents containing *any* keyword. + +:::: + + +::::{tip} +Free-text significant_terms are much more easily understood when viewed in context. Take the results of `significant_terms` suggestions from a free-text field and use them in a `terms` query on the same field with a `highlight` clause to present users with example snippets of documents. When the terms are presented unstemmed, highlighted, with the right case, in the right order and with some context, their significance/meaning is more readily apparent. + +:::: + + + +## Custom background sets [_custom_background_sets] + +Ordinarily, the foreground set of documents is "diffed" against a background set of all the documents in your index. However, sometimes it may prove useful to use a narrower background set as the basis for comparisons. For example, a query on documents relating to "Madrid" in an index with content from all over the world might reveal that "Spanish" was a significant term. This may be true but if you want some more focused terms you could use a `background_filter` on the term *spain* to establish a narrower set of documents as context. With this as a background "Spanish" would now be seen as commonplace and therefore not as significant as words like "capital" that relate more strongly with Madrid. Note that using a background filter will slow things down - each term’s background frequency must now be derived on-the-fly from filtering posting lists rather than reading the index’s pre-computed count for a term. + + +## Limitations [_limitations_8] + +### Significant terms must be indexed values [_significant_terms_must_be_indexed_values] + +Unlike the terms aggregation it is currently not possible to use script-generated terms for counting purposes. Because of the way the significant_terms aggregation must consider both *foreground* and *background* frequencies it would be prohibitively expensive to use a script on the entire index to obtain background frequencies for comparisons. Also DocValues are not supported as sources of term data for similar reasons. + + +### No analysis of floating point fields [_no_analysis_of_floating_point_fields] + +Floating point fields are currently not supported as the subject of significant_terms analysis. While integer or long fields can be used to represent concepts like bank account numbers or category numbers which can be interesting to track, floating point fields are usually used to represent quantities of something. As such, individual floating point terms are not useful for this form of frequency analysis. + + +### Use as a parent aggregation [_use_as_a_parent_aggregation] + +If there is the equivalent of a `match_all` query or no query criteria providing a subset of the index the significant_terms aggregation should not be used as the top-most aggregation - in this scenario the *foreground* set is exactly the same as the *background* set and so there is no difference in document frequencies to observe and from which to make sensible suggestions. + +Another consideration is that the significant_terms aggregation produces many candidate results at shard level that are only later pruned on the reducing node once all statistics from all shards are merged. As a result, it can be inefficient and costly in terms of RAM to embed large child aggregations under a significant_terms aggregation that later discards many candidate terms. It is advisable in these cases to perform two searches - the first to provide a rationalized list of significant_terms and then add this shortlist of terms to a second query to go back and fetch the required child aggregations. + + +### Approximate counts [_approximate_counts] + +The counts of how many documents contain a term provided in results are based on summing the samples returned from each shard and as such may be: + +* low if certain shards did not provide figures for a given term in their top sample +* high when considering the background frequency as it may count occurrences found in deleted documents + +Like most design decisions, this is the basis of a trade-off in which we have chosen to provide fast performance at the cost of some (typically small) inaccuracies. However, the `size` and `shard size` settings covered in the next section provide tools to help control the accuracy levels. + + + +## Parameters [significantterms-aggregation-parameters] + +### JLH score [_jlh_score] + +The JLH score can be used as a significance score by adding the parameter + +```js + "jlh": { + } +``` + +The scores are derived from the doc frequencies in *foreground* and *background* sets. The *absolute* change in popularity (foregroundPercent - backgroundPercent) would favor common terms whereas the *relative* change in popularity (foregroundPercent/ backgroundPercent) would favor rare terms. Rare vs common is essentially a precision vs recall balance and so the absolute and relative changes are multiplied to provide a sweet spot between precision and recall. + + +### Mutual information [_mutual_information] + +Mutual information as described in "Information Retrieval", Manning et al., Chapter 13.5.1 can be used as significance score by adding the parameter + +```js + "mutual_information": { + "include_negatives": true + } +``` + +Mutual information does not differentiate between terms that are descriptive for the subset or for documents outside the subset. The significant terms therefore can contain terms that appear more or less frequent in the subset than outside the subset. To filter out the terms that appear less often in the subset than in documents outside the subset, `include_negatives` can be set to `false`. + +Per default, the assumption is that the documents in the bucket are also contained in the background. If instead you defined a custom background filter that represents a different set of documents that you want to compare to, set + +```js +"background_is_superset": false +``` + + +### Chi square [_chi_square] + +Chi square as described in "Information Retrieval", Manning et al., Chapter 13.5.2 can be used as significance score by adding the parameter + +```js + "chi_square": { + } +``` + +Chi square behaves like mutual information and can be configured with the same parameters `include_negatives` and `background_is_superset`. + + +### Google normalized distance [_google_normalized_distance] + +Google normalized distance as described in ["The Google Similarity Distance", Cilibrasi and Vitanyi, 2007](https://arxiv.org/pdf/cs/0412098v3.pdf) can be used as significance score by adding the parameter + +```js + "gnd": { + } +``` + +`gnd` also accepts the `background_is_superset` parameter. + + +### p-value score [p-value-score] + +The p-value is the probability of obtaining test results at least as extreme as the results actually observed, under the assumption that the null hypothesis is correct. The p-value is calculated assuming that the foreground set and the background set are independent [Bernoulli trials](https://en.wikipedia.org/wiki/Bernoulli_trial), with the null hypothesis that the probabilities are the same. + +#### Example usage [_example_usage] + +This example calculates the p-value score for terms `user_agent.version` given the foreground set of "ended in failure" versus "NOT ended in failure". + +`"background_is_superset": false` indicates that the background set does not contain the counts of the foreground set as they are filtered out. + +`"normalize_above": 1000` facilitates returning consistent significance results at various scales. `1000` indicates that term counts greater than `1000` are scaled down by a factor of `1000/term_count`. + +```console +GET /_search +{ + "query": { + "bool": { + "filter": [ + { + "term": { + "event.outcome": "failure" + } + }, + { + "range": { + "@timestamp": { + "gte": "2021-02-01", + "lt": "2021-02-04" + } + } + }, + { + "term": { + "service.name": { + "value": "frontend-node" + } + } + } + ] + } + }, + "aggs": { + "failure_p_value": { + "significant_terms": { + "field": "user_agent.version", + "background_filter": { + "bool": { + "must_not": [ + { + "term": { + "event.outcome": "failure" + } + } + ], + "filter": [ + { + "range": { + "@timestamp": { + "gte": "2021-02-01", + "lt": "2021-02-04" + } + } + }, + { + "term": { + "service.name": { + "value": "frontend-node" + } + } + } + ] + } + }, + "p_value": {"background_is_superset": false, "normalize_above": 1000} + } + } + } +} +``` + + + +### Percentage [_percentage] + +A simple calculation of the number of documents in the foreground sample with a term divided by the number of documents in the background with the term. By default this produces a score greater than zero and less than one. + +The benefit of this heuristic is that the scoring logic is simple to explain to anyone familiar with a "per capita" statistic. However, for fields with high cardinality there is a tendency for this heuristic to select the rarest terms such as typos that occur only once because they score 1/1 = 100%. + +It would be hard for a seasoned boxer to win a championship if the prize was awarded purely on the basis of percentage of fights won - by these rules a newcomer with only one fight under their belt would be impossible to beat. Multiple observations are typically required to reinforce a view so it is recommended in these cases to set both `min_doc_count` and `shard_min_doc_count` to a higher value such as 10 in order to filter out the low-frequency terms that otherwise take precedence. + +```js + "percentage": { + } +``` + + +### Which one is best? [_which_one_is_best] + +Roughly, `mutual_information` prefers high frequent terms even if they occur also frequently in the background. For example, in an analysis of natural language text this might lead to selection of stop words. `mutual_information` is unlikely to select very rare terms like misspellings. `gnd` prefers terms with a high co-occurrence and avoids selection of stopwords. It might be better suited for synonym detection. However, `gnd` has a tendency to select very rare terms that are, for example, a result of misspelling. `chi_square` and `jlh` are somewhat in-between. + +It is hard to say which one of the different heuristics will be the best choice as it depends on what the significant terms are used for (see for example [Yang and Pedersen, "A Comparative Study on Feature Selection in Text Categorization", 1997](http://courses.ischool.berkeley.edu/i256/f06/papers/yang97comparative.pdf) for a study on using significant terms for feature selection for text classification). + +If none of the above measures suits your usecase than another option is to implement a custom significance measure: + + +### Scripted [_scripted] + +Customized scores can be implemented via a script: + +```js + "script_heuristic": { + "script": { + "lang": "painless", + "source": "params._subset_freq/(params._superset_freq - params._subset_freq + 1)" + } + } +``` + +Scripts can be inline (as in above example), indexed or stored on disk. For details on the options, see [script documentation](docs-content://explore-analyze/scripting.md). + +Available parameters in the script are + +`_subset_freq` +: Number of documents the term appears in the subset. + +`_superset_freq` +: Number of documents the term appears in the superset. + +`_subset_size` +: Number of documents in the subset. + +`_superset_size` +: Number of documents in the superset. + + +### Size & Shard Size [sig-terms-shard-size] + +The `size` parameter can be set to define how many term buckets should be returned out of the overall terms list. By default, the node coordinating the search process will request each shard to provide its own top term buckets and once all shards respond, it will reduce the results to the final list that will then be returned to the client. If the number of unique terms is greater than `size`, the returned list can be slightly off and not accurate (it could be that the term counts are slightly off and it could even be that a term that should have been in the top size buckets was not returned). + +To ensure better accuracy a multiple of the final `size` is used as the number of terms to request from each shard (`2 * (size * 1.5 + 10)`). To take manual control of this setting the `shard_size` parameter can be used to control the volumes of candidate terms produced by each shard. + +Low-frequency terms can turn out to be the most interesting ones once all results are combined so the significant_terms aggregation can produce higher-quality results when the `shard_size` parameter is set to values significantly higher than the `size` setting. This ensures that a bigger volume of promising candidate terms are given a consolidated review by the reducing node before the final selection. Obviously large candidate term lists will cause extra network traffic and RAM usage so this is quality/cost trade off that needs to be balanced. If `shard_size` is set to -1 (the default) then `shard_size` will be automatically estimated based on the number of shards and the `size` parameter. + +::::{note} +`shard_size` cannot be smaller than `size` (as it doesn’t make much sense). When it is, Elasticsearch will override it and reset it to be equal to `size`. +:::: + + + +### Minimum document count [_minimum_document_count_2] + +It is possible to only return terms that match more than a configured number of hits using the `min_doc_count` option: + +$$$significantterms-aggregation-min-document-example$$$ + +```console +GET /_search +{ + "aggs": { + "tags": { + "significant_terms": { + "field": "tag", + "min_doc_count": 10 + } + } + } +} +``` + +The above aggregation would only return tags which have been found in 10 hits or more. Default value is `3`. + +Terms that score highly will be collected on a shard level and merged with the terms collected from other shards in a second step. However, the shard does not have the information about the global term frequencies available. The decision if a term is added to a candidate list depends only on the score computed on the shard using local shard frequencies, not the global frequencies of the word. The `min_doc_count` criterion is only applied after merging local terms statistics of all shards. In a way the decision to add the term as a candidate is made without being very *certain* about if the term will actually reach the required `min_doc_count`. This might cause many (globally) high frequent terms to be missing in the final result if low frequent but high scoring terms populated the candidate lists. To avoid this, the `shard_size` parameter can be increased to allow more candidate terms on the shards. However, this increases memory consumption and network traffic. + + +### `shard_min_doc_count` [search-aggregations-bucket-significantterms-shard-min-doc-count] + +The parameter `shard_min_doc_count` regulates the *certainty* a shard has if the term should actually be added to the candidate list or not with respect to the `min_doc_count`. Terms will only be considered if their local shard frequency within the set is higher than the `shard_min_doc_count`. If your dictionary contains many low frequent terms and you are not interested in those (for example misspellings), then you can set the `shard_min_doc_count` parameter to filter out candidate terms on a shard level that will with a reasonable certainty not reach the required `min_doc_count` even after merging the local counts. `shard_min_doc_count` is set to `0` per default and has no effect unless you explicitly set it. + +::::{warning} +Setting `min_doc_count` to `1` is generally not advised as it tends to return terms that are typos or other bizarre curiosities. Finding more than one instance of a term helps reinforce that, while still rare, the term was not the result of a one-off accident. The default value of 3 is used to provide a minimum weight-of-evidence. Setting `shard_min_doc_count` too high will cause significant candidate terms to be filtered out on a shard level. This value should be set much lower than `min_doc_count/#shards`. +:::: + + + +### Custom background context [_custom_background_context] + +The default source of statistical information for background term frequencies is the entire index and this scope can be narrowed through the use of a `background_filter` to focus in on significant terms within a narrower context: + +$$$significantterms-aggregation-custom-background-example$$$ + +```console +GET /_search +{ + "query": { + "match": { + "city": "madrid" + } + }, + "aggs": { + "tags": { + "significant_terms": { + "field": "tag", + "background_filter": { + "term": { "text": "spain" } + } + } + } + } +} +``` + +The above filter would help focus in on terms that were peculiar to the city of Madrid rather than revealing terms like "Spanish" that are unusual in the full index’s worldwide context but commonplace in the subset of documents containing the word "Spain". + +::::{warning} +Use of background filters will slow the query as each term’s postings must be filtered to determine a frequency +:::: + + + +### Filtering Values [_filtering_values_2] + +It is possible (although rarely required) to filter the values for which buckets will be created. This can be done using the `include` and `exclude` parameters which are based on a regular expression string or arrays of exact terms. This functionality mirrors the features described in the [terms aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md) documentation. + + + +## Collect mode [_collect_mode] + +To avoid memory issues, the `significant_terms` aggregation always computes child aggregations in `breadth_first` mode. A description of the different collection modes can be found in the [terms aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md#search-aggregations-bucket-terms-aggregation-collect) documentation. + + +## Execution hint [_execution_hint_2] + +There are different mechanisms by which terms aggregations can be executed: + +* by using field values directly in order to aggregate data per-bucket (`map`) +* by using [global ordinals](/reference/elasticsearch/mapping-reference/eager-global-ordinals.md) of the field and allocating one bucket per global ordinal (`global_ordinals`) + +Elasticsearch tries to have sensible defaults so this is something that generally doesn’t need to be configured. + +`global_ordinals` is the default option for `keyword` field, it uses global ordinals to allocates buckets dynamically so memory usage is linear to the number of values of the documents that are part of the aggregation scope. + +`map` should only be considered when very few documents match a query. Otherwise the ordinals-based execution mode is significantly faster. By default, `map` is only used when running an aggregation on scripts, since they don’t have ordinals. + +$$$significantterms-aggregation-execution-hint-example$$$ + +```console +GET /_search +{ + "aggs": { + "tags": { + "significant_terms": { + "field": "tags", + "execution_hint": "map" <1> + } + } + } +} +``` + +1. the possible values are `map`, `global_ordinals` + + +Please note that Elasticsearch will ignore this execution hint if it is not applicable. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-significanttext-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-significanttext-aggregation.md new file mode 100644 index 0000000000000..845e68d95501e --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-significanttext-aggregation.md @@ -0,0 +1,409 @@ +--- +navigation_title: "Significant text" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-significanttext-aggregation.html +--- + +# Significant text aggregation [search-aggregations-bucket-significanttext-aggregation] + + +An aggregation that returns interesting or unusual occurrences of free-text terms in a set. It is like the [significant terms](/reference/data-analysis/aggregations/search-aggregations-bucket-significantterms-aggregation.md) aggregation but differs in that: + +* It is specifically designed for use on type `text` fields +* It does not require field data or doc-values +* It re-analyzes text content on-the-fly meaning it can also filter duplicate sections of noisy text that otherwise tend to skew statistics. + +::::{warning} +Re-analyzing *large* result sets will require a lot of time and memory. It is recommended that the significant_text aggregation is used as a child of either the [sampler](/reference/data-analysis/aggregations/search-aggregations-bucket-sampler-aggregation.md) or [diversified sampler](/reference/data-analysis/aggregations/search-aggregations-bucket-diversified-sampler-aggregation.md) aggregation to limit the analysis to a *small* selection of top-matching documents e.g. 200. This will typically improve speed, memory use and quality of results. +:::: + + +* Suggesting "H5N1" when users search for "bird flu" to help expand queries +* Suggesting keywords relating to stock symbol $ATI for use in an automated news classifier + +In these cases the words being selected are not simply the most popular terms in results. The most popular words tend to be very boring (*and, of, the, we, I, they* …​). The significant words are the ones that have undergone a significant change in popularity measured between a *foreground* and *background* set. If the term "H5N1" only exists in 5 documents in a 10 million document index and yet is found in 4 of the 100 documents that make up a user’s search results that is significant and probably very relevant to their search. 5/10,000,000 vs 4/100 is a big swing in frequency. + +## Basic use [_basic_use_2] + +In the typical use case, the *foreground* set of interest is a selection of the top-matching search results for a query and the *background* set used for statistical comparisons is the index or indices from which the results were gathered. + +Example: + +$$$significanttext-aggregation-example$$$ + +```console +GET news/_search +{ + "query": { + "match": { "content": "Bird flu" } + }, + "aggregations": { + "my_sample": { + "sampler": { + "shard_size": 100 + }, + "aggregations": { + "keywords": { + "significant_text": { "field": "content" } + } + } + } + } +} +``` + +Response: + +```console-result +{ + "took": 9, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations" : { + "my_sample": { + "doc_count": 100, + "keywords" : { + "doc_count": 100, + "buckets" : [ + { + "key": "h5n1", + "doc_count": 4, + "score": 4.71235374214817, + "bg_count": 5 + } + ... + ] + } + } + } +} +``` + +The results show that "h5n1" is one of several terms strongly associated with bird flu. It only occurs 5 times in our index as a whole (see the `bg_count`) and yet 4 of these were lucky enough to appear in our 100 document sample of "bird flu" results. That suggests a significant word and one which the user can potentially add to their search. + + +## Dealing with noisy data using `filter_duplicate_text` [filter-duplicate-text-noisy-data] + +Free-text fields often contain a mix of original content and mechanical copies of text (cut-and-paste biographies, email reply chains, retweets, boilerplate headers/footers, page navigation menus, sidebar news links, copyright notices, standard disclaimers, addresses). + +In real-world data these duplicate sections of text tend to feature heavily in `significant_text` results if they aren’t filtered out. Filtering near-duplicate text is a difficult task at index-time but we can cleanse the data on-the-fly at query time using the `filter_duplicate_text` setting. + +First let’s look at an unfiltered real-world example using the [Signal media dataset](https://research.signalmedia.co/newsir16/signal-dataset.md) of a million news articles covering a wide variety of news. Here are the raw significant text results for a search for the articles mentioning "elasticsearch": + +```js +{ + ... + "aggregations": { + "sample": { + "doc_count": 35, + "keywords": { + "doc_count": 35, + "buckets": [ + { + "key": "elasticsearch", + "doc_count": 35, + "score": 28570.428571428572, + "bg_count": 35 + }, + ... + { + "key": "currensee", + "doc_count": 8, + "score": 6530.383673469388, + "bg_count": 8 + }, + ... + { + "key": "pozmantier", + "doc_count": 4, + "score": 3265.191836734694, + "bg_count": 4 + }, + ... + +} +``` + +The uncleansed documents have thrown up some odd-looking terms that are, on the face of it, statistically correlated with appearances of our search term "elasticsearch" e.g. "pozmantier". We can drill down into examples of these documents to see why pozmantier is connected using this query: + +$$$significanttext-aggregation-pozmantier-example$$$ + +```console +GET news/_search +{ + "query": { + "simple_query_string": { + "query": "+elasticsearch +pozmantier" + } + }, + "_source": [ + "title", + "source" + ], + "highlight": { + "fields": { + "content": {} + } + } +} +``` + +The results show a series of very similar news articles about a judging panel for a number of tech projects: + +```js +{ + ... + "hits": { + "hits": [ + { + ... + "_source": { + "source": "Presentation Master", + "title": "T.E.N. Announces Nominees for the 2015 ISE® North America Awards" + }, + "highlight": { + "content": [ + "City of San Diego Mike Pozmantier, Program Manager, Cyber Security Division, Department of", + " Janus, Janus ElasticSearch Security Visualization Engine " + ] + } + }, + { + ... + "_source": { + "source": "RCL Advisors", + "title": "T.E.N. Announces Nominees for the 2015 ISE(R) North America Awards" + }, + "highlight": { + "content": [ + "Mike Pozmantier, Program Manager, Cyber Security Division, Department of Homeland Security S&T", + "Janus, Janus ElasticSearch Security Visualization Engine" + ] + } + }, + ... +``` + +Mike Pozmantier was one of many judges on a panel and elasticsearch was used in one of many projects being judged. + +As is typical, this lengthy press release was cut-and-paste by a variety of news sites and consequently any rare names, numbers or typos they contain become statistically correlated with our matching query. + +Fortunately similar documents tend to rank similarly so as part of examining the stream of top-matching documents the significant_text aggregation can apply a filter to remove sequences of any 6 or more tokens that have already been seen. Let’s try this same query now but with the `filter_duplicate_text` setting turned on: + +$$$significanttext-aggregation-filter-duplicate-text-example$$$ + +```console +GET news/_search +{ + "query": { + "match": { + "content": "elasticsearch" + } + }, + "aggs": { + "sample": { + "sampler": { + "shard_size": 100 + }, + "aggs": { + "keywords": { + "significant_text": { + "field": "content", + "filter_duplicate_text": true + } + } + } + } + } +} +``` + +The results from analysing our deduplicated text are obviously of higher quality to anyone familiar with the elastic stack: + +```js +{ + ... + "aggregations": { + "sample": { + "doc_count": 35, + "keywords": { + "doc_count": 35, + "buckets": [ + { + "key": "elasticsearch", + "doc_count": 22, + "score": 11288.001166180758, + "bg_count": 35 + }, + { + "key": "logstash", + "doc_count": 3, + "score": 1836.648979591837, + "bg_count": 4 + }, + { + "key": "kibana", + "doc_count": 3, + "score": 1469.3020408163263, + "bg_count": 5 + } + ] + } + } + } +} +``` + +Mr Pozmantier and other one-off associations with elasticsearch no longer appear in the aggregation results as a consequence of copy-and-paste operations or other forms of mechanical repetition. + +If your duplicate or near-duplicate content is identifiable via a single-value indexed field (perhaps a hash of the article’s `title` text or an `original_press_release_url` field) then it would be more efficient to use a parent [diversified sampler](/reference/data-analysis/aggregations/search-aggregations-bucket-diversified-sampler-aggregation.md) aggregation to eliminate these documents from the sample set based on that single key. The less duplicate content you can feed into the significant_text aggregation up front the better in terms of performance. + +::::{admonition} How are the significance scores calculated? +The numbers returned for scores are primarily intended for ranking different suggestions sensibly rather than something easily understood by end users. The scores are derived from the doc frequencies in *foreground* and *background* sets. In brief, a term is considered significant if there is a noticeable difference in the frequency in which a term appears in the subset and in the background. The way the terms are ranked can be configured, see "Parameters" section. + +:::: + + +::::{admonition} Use the *"like this but not this"* pattern +You can spot mis-categorized content by first searching a structured field e.g. `category:adultMovie` and use significant_text on the text "movie_description" field. Take the suggested words (I’ll leave them to your imagination) and then search for all movies NOT marked as category:adultMovie but containing these keywords. You now have a ranked list of badly-categorized movies that you should reclassify or at least remove from the "familyFriendly" category. + +The significance score from each term can also provide a useful `boost` setting to sort matches. Using the `minimum_should_match` setting of the `terms` query with the keywords will help control the balance of precision/recall in the result set i.e a high setting would have a small number of relevant results packed full of keywords and a setting of "1" would produce a more exhaustive results set with all documents containing *any* keyword. + +:::: + + + +## Limitations [_limitations_9] + +### No support for child aggregations [_no_support_for_child_aggregations] + +The significant_text aggregation intentionally does not support the addition of child aggregations because: + +* It would come with a high memory cost +* It isn’t a generally useful feature and there is a workaround for those that need it + +The volume of candidate terms is generally very high and these are pruned heavily before the final results are returned. Supporting child aggregations would generate additional churn and be inefficient. Clients can always take the heavily-trimmed set of results from a `significant_text` request and make a subsequent follow-up query using a `terms` aggregation with an `include` clause and child aggregations to perform further analysis of selected keywords in a more efficient fashion. + + +### No support for nested objects [_no_support_for_nested_objects] + +The significant_text aggregation currently also cannot be used with text fields in nested objects, because it works with the document JSON source. This makes this feature inefficient when matching nested docs from stored JSON given a matching Lucene docID. + + +### Approximate counts [_approximate_counts_2] + +The counts of how many documents contain a term provided in results are based on summing the samples returned from each shard and as such may be: + +* low if certain shards did not provide figures for a given term in their top sample +* high when considering the background frequency as it may count occurrences found in deleted documents + +Like most design decisions, this is the basis of a trade-off in which we have chosen to provide fast performance at the cost of some (typically small) inaccuracies. However, the `size` and `shard size` settings covered in the next section provide tools to help control the accuracy levels. + + + +## Parameters [significanttext-aggregation-parameters] + +### Significance heuristics [_significance_heuristics] + +This aggregation supports the same scoring heuristics (JLH, mutual_information, gnd, chi_square etc) as the [significant terms](/reference/data-analysis/aggregations/search-aggregations-bucket-significantterms-aggregation.md) aggregation + + +### Size & Shard Size [sig-text-shard-size] + +The `size` parameter can be set to define how many term buckets should be returned out of the overall terms list. By default, the node coordinating the search process will request each shard to provide its own top term buckets and once all shards respond, it will reduce the results to the final list that will then be returned to the client. If the number of unique terms is greater than `size`, the returned list can be slightly off and not accurate (it could be that the term counts are slightly off and it could even be that a term that should have been in the top size buckets was not returned). + +To ensure better accuracy a multiple of the final `size` is used as the number of terms to request from each shard (`2 * (size * 1.5 + 10)`). To take manual control of this setting the `shard_size` parameter can be used to control the volumes of candidate terms produced by each shard. + +Low-frequency terms can turn out to be the most interesting ones once all results are combined so the significant_terms aggregation can produce higher-quality results when the `shard_size` parameter is set to values significantly higher than the `size` setting. This ensures that a bigger volume of promising candidate terms are given a consolidated review by the reducing node before the final selection. Obviously large candidate term lists will cause extra network traffic and RAM usage so this is quality/cost trade off that needs to be balanced. If `shard_size` is set to -1 (the default) then `shard_size` will be automatically estimated based on the number of shards and the `size` parameter. + +::::{note} +`shard_size` cannot be smaller than `size` (as it doesn’t make much sense). When it is, elasticsearch will override it and reset it to be equal to `size`. +:::: + + + +### Minimum document count [_minimum_document_count_3] + +It is possible to only return terms that match more than a configured number of hits using the `min_doc_count` option. The Default value is 3. + +Terms that score highly will be collected on a shard level and merged with the terms collected from other shards in a second step. However, the shard does not have the information about the global term frequencies available. The decision if a term is added to a candidate list depends only on the score computed on the shard using local shard frequencies, not the global frequencies of the word. The `min_doc_count` criterion is only applied after merging local terms statistics of all shards. In a way the decision to add the term as a candidate is made without being very *certain* about if the term will actually reach the required `min_doc_count`. This might cause many (globally) high frequent terms to be missing in the final result if low frequent but high scoring terms populated the candidate lists. To avoid this, the `shard_size` parameter can be increased to allow more candidate terms on the shards. However, this increases memory consumption and network traffic. + +#### `shard_min_doc_count` [search-aggregations-bucket-significanttext-shard-min-doc-count] + +The parameter `shard_min_doc_count` regulates the *certainty* a shard has if the term should actually be added to the candidate list or not with respect to the `min_doc_count`. Terms will only be considered if their local shard frequency within the set is higher than the `shard_min_doc_count`. If your dictionary contains many low frequent terms and you are not interested in those (for example misspellings), then you can set the `shard_min_doc_count` parameter to filter out candidate terms on a shard level that will with a reasonable certainty not reach the required `min_doc_count` even after merging the local counts. `shard_min_doc_count` is set to `0` per default and has no effect unless you explicitly set it. + +::::{warning} +Setting `min_doc_count` to `1` is generally not advised as it tends to return terms that are typos or other bizarre curiosities. Finding more than one instance of a term helps reinforce that, while still rare, the term was not the result of a one-off accident. The default value of 3 is used to provide a minimum weight-of-evidence. Setting `shard_min_doc_count` too high will cause significant candidate terms to be filtered out on a shard level. This value should be set much lower than `min_doc_count/#shards`. +:::: + + + + +### Custom background context [_custom_background_context_2] + +The default source of statistical information for background term frequencies is the entire index and this scope can be narrowed through the use of a `background_filter` to focus in on significant terms within a narrower context: + +$$$significanttext-aggregation-custom-background-example$$$ + +```console +GET news/_search +{ + "query": { + "match": { + "content": "madrid" + } + }, + "aggs": { + "tags": { + "significant_text": { + "field": "content", + "background_filter": { + "term": { "content": "spain" } + } + } + } + } +} +``` + +The above filter would help focus in on terms that were peculiar to the city of Madrid rather than revealing terms like "Spanish" that are unusual in the full index’s worldwide context but commonplace in the subset of documents containing the word "Spain". + +::::{warning} +Use of background filters will slow the query as each term’s postings must be filtered to determine a frequency +:::: + + + +### Dealing with source and index mappings [_dealing_with_source_and_index_mappings] + +Ordinarily the indexed field name and the original JSON field being retrieved share the same name. However with more complex field mappings using features like `copy_to` the source JSON field(s) and the indexed field being aggregated can differ. In these cases it is possible to list the JSON _source fields from which text will be analyzed using the `source_fields` parameter: + +$$$significanttext-aggregation-mappings-example$$$ + +```console +GET news/_search +{ + "query": { + "match": { + "custom_all": "elasticsearch" + } + }, + "aggs": { + "tags": { + "significant_text": { + "field": "custom_all", + "source_fields": [ "content", "title" ] + } + } + } +} +``` + + +### Filtering Values [_filtering_values_3] + +It is possible (although rarely required) to filter the values for which buckets will be created. This can be done using the `include` and `exclude` parameters which are based on a regular expression string or arrays of exact terms. This functionality mirrors the features described in the [terms aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md) documentation. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md new file mode 100644 index 0000000000000..c2112aafca0c2 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md @@ -0,0 +1,718 @@ +--- +navigation_title: "Terms" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html +--- + +# Terms aggregation [search-aggregations-bucket-terms-aggregation] + + +A multi-bucket value source based aggregation where buckets are dynamically built - one per unique value. + +Example: + +$$$terms-aggregation-example$$$ + +```console +GET /_search +{ + "aggs": { + "genres": { + "terms": { "field": "genre" } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "genres": { + "doc_count_error_upper_bound": 0, <1> + "sum_other_doc_count": 0, <2> + "buckets": [ <3> + { + "key": "electronic", + "doc_count": 6 + }, + { + "key": "rock", + "doc_count": 3 + }, + { + "key": "jazz", + "doc_count": 2 + } + ] + } + } +} +``` + +1. an upper bound of the error on the document counts for each term, see [below](#terms-agg-doc-count-error) +2. when there are lots of unique terms, Elasticsearch only returns the top terms; this number is the sum of the document counts for all buckets that are not part of the response +3. the list of the top buckets, the meaning of `top` being defined by the [order](#search-aggregations-bucket-terms-aggregation-order) + + +$$$search-aggregations-bucket-terms-aggregation-types$$$ +The `field` can be [Keyword](/reference/elasticsearch/mapping-reference/keyword.md), [Numeric](/reference/elasticsearch/mapping-reference/number.md), [`ip`](/reference/elasticsearch/mapping-reference/ip.md), [`boolean`](/reference/elasticsearch/mapping-reference/boolean.md), or [`binary`](/reference/elasticsearch/mapping-reference/binary.md). + +::::{note} +By default, you cannot run a `terms` aggregation on a `text` field. Use a `keyword` [sub-field](/reference/elasticsearch/mapping-reference/multi-fields.md) instead. Alternatively, you can enable [`fielddata`](/reference/elasticsearch/mapping-reference/text.md#fielddata-mapping-param) on the `text` field to create buckets for the field’s [analyzed](docs-content://manage-data/data-store/text-analysis.md) terms. Enabling `fielddata` can significantly increase memory usage. +:::: + + +## Size [search-aggregations-bucket-terms-aggregation-size] + +By default, the `terms` aggregation returns the top ten terms with the most documents. Use the `size` parameter to return more terms, up to the [search.max_buckets](/reference/elasticsearch/configuration-reference/search-settings.md#search-settings-max-buckets) limit. + +If your data contains 100 or 1000 unique terms, you can increase the `size` of the `terms` aggregation to return them all. If you have more unique terms and you need them all, use the [composite aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-composite-aggregation.md) instead. + +Larger values of `size` use more memory to compute and, push the whole aggregation close to the `max_buckets` limit. You’ll know you’ve gone too large if the request fails with a message about `max_buckets`. + + +## Shard size [search-aggregations-bucket-terms-aggregation-shard-size] + +To get more accurate results, the `terms` agg fetches more than the top `size` terms from each shard. It fetches the top `shard_size` terms, which defaults to `size * 1.5 + 10`. + +This is to handle the case when one term has many documents on one shard but is just below the `size` threshold on all other shards. If each shard only returned `size` terms, the aggregation would return an partial doc count for the term. So `terms` returns more terms in an attempt to catch the missing terms. This helps, but it’s still quite possible to return a partial doc count for a term. It just takes a term with more disparate per-shard doc counts. + +You can increase `shard_size` to better account for these disparate doc counts and improve the accuracy of the selection of top terms. It is much cheaper to increase the `shard_size` than to increase the `size`. However, it still takes more bytes over the wire and waiting in memory on the coordinating node. + +::::{important} +This guidance only applies if you’re using the `terms` aggregation’s default sort `order`. If you’re sorting by anything other than document count in descending order, see [Order](#search-aggregations-bucket-terms-aggregation-order). +:::: + + +::::{note} +`shard_size` cannot be smaller than `size` (as it doesn’t make much sense). When it is, Elasticsearch will override it and reset it to be equal to `size`. +:::: + + + +## Document count error [terms-agg-doc-count-error] + +Even with a larger `shard_size` value, `doc_count` values for a `terms` aggregation may be approximate. As a result, any sub-aggregations on the `terms` aggregation may also be approximate. + +`sum_other_doc_count` is the number of documents that didn’t make it into the the top `size` terms. If this is greater than `0`, you can be sure that the `terms` agg had to throw away some buckets, either because they didn’t fit into `size` on the coordinating node or they didn’t fit into `shard_size` on the data node. + + +## Per bucket document count error [_per_bucket_document_count_error] + +If you set the `show_term_doc_count_error` parameter to `true`, the `terms` aggregation will include `doc_count_error_upper_bound`, which is an upper bound to the error on the `doc_count` returned by each shard. It’s the sum of the size of the largest bucket on each shard that didn’t fit into `shard_size`. + +In more concrete terms, imagine there is one bucket that is very large on one shard and just outside the `shard_size` on all the other shards. In that case, the `terms` agg will return the bucket because it is large, but it’ll be missing data from many documents on the shards where the term fell below the `shard_size` threshold. `doc_count_error_upper_bound` is the maximum number of those missing documents. + +$$$terms-aggregation-doc-count-error-example$$$ + +```console +GET /_search +{ + "aggs": { + "products": { + "terms": { + "field": "product", + "size": 5, + "show_term_doc_count_error": true + } + } + } +} +``` + +These errors can only be calculated in this way when the terms are ordered by descending document count. When the aggregation is ordered by the terms values themselves (either ascending or descending) there is no error in the document count since if a shard does not return a particular term which appears in the results from another shard, it must not have that term in its index. When the aggregation is either sorted by a sub aggregation or in order of ascending document count, the error in the document counts cannot be determined and is given a value of -1 to indicate this. + + +## Order [search-aggregations-bucket-terms-aggregation-order] + +By default, the `terms` aggregation orders terms by descending document `_count`. This produces a bounded [document count](#terms-agg-doc-count-error) error that {{es}} can report. + +You can use the `order` parameter to specify a different sort order, but we don’t recommend it. It is extremely easy to create a terms ordering that will just return wrong results, and not obvious to see when you have done so. Change this only with caution. + +::::{warning} +Especially avoid using `"order": { "_count": "asc" }`. If you need to find rare terms, use the [`rare_terms`](/reference/data-analysis/aggregations/search-aggregations-bucket-rare-terms-aggregation.md) aggregation instead. Due to the way the `terms` aggregation [gets terms from shards](#search-aggregations-bucket-terms-aggregation-shard-size), sorting by ascending doc count often produces inaccurate results. +:::: + + +### Ordering by the term value [_ordering_by_the_term_value] + +In this case, the buckets are ordered by the actual term values, such as lexicographic order for keywords or numerically for numbers. This sorting is safe in both ascending and descending directions, and produces accurate results. + +Example of ordering the buckets alphabetically by their terms in an ascending manner: + +$$$terms-aggregation-asc-example$$$ + +```console +GET /_search +{ + "aggs": { + "genres": { + "terms": { + "field": "genre", + "order": { "_key": "asc" } + } + } + } +} +``` + + +### Ordering by a sub aggregation [_ordering_by_a_sub_aggregation] + +::::{warning} +Sorting by a sub aggregation generally produces incorrect ordering, due to the way the `terms` aggregation [gets results from shards](#search-aggregations-bucket-terms-aggregation-shard-size). +:::: + + +There are two cases when sub-aggregation ordering is safe and returns correct results: sorting by a maximum in descending order, or sorting by a minimum in ascending order. These approaches work because they align with the behavior of sub aggregations. That is, if you’re looking for the largest maximum or the smallest minimum, the global answer (from combined shards) must be included in one of the local shard answers. Conversely, the smallest maximum and largest minimum wouldn’t be accurately computed. + +Note also that in these cases, the ordering is correct but the doc counts and non-ordering sub aggregations may still have errors (and {{es}} does not calculate a bound for those errors). + +Ordering the buckets by single value metrics sub-aggregation (identified by the aggregation name): + +$$$terms-aggregation-subaggregation-example$$$ + +```console +GET /_search +{ + "aggs": { + "genres": { + "terms": { + "field": "genre", + "order": { "max_play_count": "desc" } + }, + "aggs": { + "max_play_count": { "max": { "field": "play_count" } } + } + } + } +} +``` + +Ordering the buckets by multi value metrics sub-aggregation (identified by the aggregation name): + +$$$terms-aggregation-multivalue-subaggregation-example$$$ + +```console +GET /_search +{ + "aggs": { + "genres": { + "terms": { + "field": "genre", + "order": { "playback_stats.max": "desc" } + }, + "aggs": { + "playback_stats": { "stats": { "field": "play_count" } } + } + } + } +} +``` + +::::{admonition} Pipeline aggs cannot be used for sorting +:class: note + +[Pipeline aggregations](/reference/data-analysis/aggregations/pipeline.md) are run during the reduce phase after all other aggregations have already completed. For this reason, they cannot be used for ordering. + +:::: + + +It is also possible to order the buckets based on a "deeper" aggregation in the hierarchy. This is supported as long as the aggregations path are of a single-bucket type, where the last aggregation in the path may either be a single-bucket one or a metrics one. If it’s a single-bucket type, the order will be defined by the number of docs in the bucket (i.e. `doc_count`), in case it’s a metrics one, the same rules as above apply (where the path must indicate the metric name to sort by in case of a multi-value metrics aggregation, and in case of a single-value metrics aggregation the sort will be applied on that value). + +The path must be defined in the following form: + +```ebnf +AGG_SEPARATOR = '>' ; +METRIC_SEPARATOR = '.' ; +AGG_NAME = ; +METRIC = ; +PATH = [ , ]* [ , ] ; +``` + +$$$terms-aggregation-hierarchy-example$$$ + +```console +GET /_search +{ + "aggs": { + "countries": { + "terms": { + "field": "artist.country", + "order": { "rock>playback_stats.avg": "desc" } + }, + "aggs": { + "rock": { + "filter": { "term": { "genre": "rock" } }, + "aggs": { + "playback_stats": { "stats": { "field": "play_count" } } + } + } + } + } + } +} +``` + +The above will sort the artist’s countries buckets based on the average play count among the rock songs. + +Multiple criteria can be used to order the buckets by providing an array of order criteria such as the following: + +$$$terms-aggregation-multicriteria-example$$$ + +```console +GET /_search +{ + "aggs": { + "countries": { + "terms": { + "field": "artist.country", + "order": [ { "rock>playback_stats.avg": "desc" }, { "_count": "desc" } ] + }, + "aggs": { + "rock": { + "filter": { "term": { "genre": "rock" } }, + "aggs": { + "playback_stats": { "stats": { "field": "play_count" } } + } + } + } + } + } +} +``` + +The above will sort the artist’s countries buckets based on the average play count among the rock songs and then by their `doc_count` in descending order. + +::::{note} +In the event that two buckets share the same values for all order criteria the bucket’s term value is used as a tie-breaker in ascending alphabetical order to prevent non-deterministic ordering of buckets. +:::: + + + +### Ordering by count ascending [_ordering_by_count_ascending] + +Ordering terms by ascending document `_count` produces an unbounded error that {{es}} can’t accurately report. We therefore strongly recommend against using `"order": { "_count": "asc" }` as shown in the following example: + +$$$terms-aggregation-count-example$$$ + +```console +GET /_search +{ + "aggs": { + "genres": { + "terms": { + "field": "genre", + "order": { "_count": "asc" } + } + } + } +} +``` + + + +## Minimum document count [_minimum_document_count_4] + +It is possible to only return terms that match more than a configured number of hits using the `min_doc_count` option: + +$$$terms-aggregation-min-doc-count-example$$$ + +```console +GET /_search +{ + "aggs": { + "tags": { + "terms": { + "field": "tags", + "min_doc_count": 10 + } + } + } +} +``` + +The above aggregation would only return tags which have been found in 10 hits or more. Default value is `1`. + +Terms are collected and ordered on a shard level and merged with the terms collected from other shards in a second step. However, the shard does not have the information about the global document count available. The decision if a term is added to a candidate list depends only on the order computed on the shard using local shard frequencies. The `min_doc_count` criterion is only applied after merging local terms statistics of all shards. In a way the decision to add the term as a candidate is made without being very *certain* about if the term will actually reach the required `min_doc_count`. This might cause many (globally) high frequent terms to be missing in the final result if low frequent terms populated the candidate lists. To avoid this, the `shard_size` parameter can be increased to allow more candidate terms on the shards. However, this increases memory consumption and network traffic. + +### `shard_min_doc_count` [search-aggregations-bucket-terms-shard-min-doc-count] + +The parameter `shard_min_doc_count` regulates the *certainty* a shard has if the term should actually be added to the candidate list or not with respect to the `min_doc_count`. Terms will only be considered if their local shard frequency within the set is higher than the `shard_min_doc_count`. If your dictionary contains many low frequent terms and you are not interested in those (for example misspellings), then you can set the `shard_min_doc_count` parameter to filter out candidate terms on a shard level that will with a reasonable certainty not reach the required `min_doc_count` even after merging the local counts. `shard_min_doc_count` is set to `0` per default and has no effect unless you explicitly set it. + +::::{note} +Setting `min_doc_count`=`0` will also return buckets for terms that didn’t match any hit. However, some of the returned terms which have a document count of zero might only belong to deleted documents or documents from other types, so there is no warranty that a `match_all` query would find a positive document count for those terms. +:::: + + +::::{warning} +When NOT sorting on `doc_count` descending, high values of `min_doc_count` may return a number of buckets which is less than `size` because not enough data was gathered from the shards. Missing buckets can be back by increasing `shard_size`. Setting `shard_min_doc_count` too high will cause terms to be filtered out on a shard level. This value should be set much lower than `min_doc_count/#shards`. +:::: + + + + +## Script [search-aggregations-bucket-terms-aggregation-script] + +Use a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md) if the data in your documents doesn’t exactly match what you’d like to aggregate. If, for example, "anthologies" need to be in a special category then you could run this: + +$$$terms-aggregation-script-example$$$ + +```console +GET /_search +{ + "size": 0, + "runtime_mappings": { + "normalized_genre": { + "type": "keyword", + "script": """ + String genre = doc['genre'].value; + if (doc['product'].value.startsWith('Anthology')) { + emit(genre + ' anthology'); + } else { + emit(genre); + } + """ + } + }, + "aggs": { + "genres": { + "terms": { + "field": "normalized_genre" + } + } + } +} +``` + +Which will look like: + +```console-result +{ + "aggregations": { + "genres": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "electronic", + "doc_count": 4 + }, + { + "key": "rock", + "doc_count": 3 + }, + { + "key": "electronic anthology", + "doc_count": 2 + }, + { + "key": "jazz", + "doc_count": 2 + } + ] + } + }, + ... +} +``` + +This is a little slower because the runtime field has to access two fields instead of one and because there are some optimizations that work on non-runtime `keyword` fields that we have to give up for for runtime `keyword` fields. If you need the speed, you can index the `normalized_genre` field. + + +## Filtering Values [_filtering_values_4] + +It is possible to filter the values for which buckets will be created. This can be done using the `include` and `exclude` parameters which are based on regular expression strings or arrays of exact values. Additionally, `include` clauses can filter using `partition` expressions. + +### Filtering Values with regular expressions [_filtering_values_with_regular_expressions_2] + +$$$terms-aggregation-regex-example$$$ + +```console +GET /_search +{ + "aggs": { + "tags": { + "terms": { + "field": "tags", + "include": ".*sport.*", + "exclude": "water_.*" + } + } + } +} +``` + +In the above example, buckets will be created for all the tags that has the word `sport` in them, except those starting with `water_` (so the tag `water_sports` will not be aggregated). The `include` regular expression will determine what values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`. + +The syntax is the same as [regexp queries](/reference/query-languages/regexp-syntax.md). + + +### Filtering Values with exact values [_filtering_values_with_exact_values_2] + +For matching based on exact values the `include` and `exclude` parameters can simply take an array of strings that represent the terms as they are found in the index: + +$$$terms-aggregation-exact-example$$$ + +```console +GET /_search +{ + "aggs": { + "JapaneseCars": { + "terms": { + "field": "make", + "include": [ "mazda", "honda" ] + } + }, + "ActiveCarManufacturers": { + "terms": { + "field": "make", + "exclude": [ "rover", "jensen" ] + } + } + } +} +``` + + +### Filtering Values with partitions [_filtering_values_with_partitions] + +Sometimes there are too many unique terms to process in a single request/response pair so it can be useful to break the analysis up into multiple requests. This can be achieved by grouping the field’s values into a number of partitions at query-time and processing only one partition in each request. Consider this request which is looking for accounts that have not logged any access recently: + +$$$terms-aggregation-partitions-example$$$ + +```console +GET /_search +{ + "size": 0, + "aggs": { + "expired_sessions": { + "terms": { + "field": "account_id", + "include": { + "partition": 0, + "num_partitions": 20 + }, + "size": 10000, + "order": { + "last_access": "asc" + } + }, + "aggs": { + "last_access": { + "max": { + "field": "access_date" + } + } + } + } + } +} +``` + +This request is finding the last logged access date for a subset of customer accounts because we might want to expire some customer accounts who haven’t been seen for a long while. The `num_partitions` setting has requested that the unique account_ids are organized evenly into twenty partitions (0 to 19). and the `partition` setting in this request filters to only consider account_ids falling into partition 0. Subsequent requests should ask for partitions 1 then 2 etc to complete the expired-account analysis. + +Note that the `size` setting for the number of results returned needs to be tuned with the `num_partitions`. For this particular account-expiration example the process for balancing values for `size` and `num_partitions` would be as follows: + +1. Use the `cardinality` aggregation to estimate the total number of unique account_id values +2. Pick a value for `num_partitions` to break the number from 1) up into more manageable chunks +3. Pick a `size` value for the number of responses we want from each partition +4. Run a test request + +If we have a circuit-breaker error we are trying to do too much in one request and must increase `num_partitions`. If the request was successful but the last account ID in the date-sorted test response was still an account we might want to expire then we may be missing accounts of interest and have set our numbers too low. We must either + +* increase the `size` parameter to return more results per partition (could be heavy on memory) or +* increase the `num_partitions` to consider less accounts per request (could increase overall processing time as we need to make more requests) + +Ultimately this is a balancing act between managing the Elasticsearch resources required to process a single request and the volume of requests that the client application must issue to complete a task. + +::::{warning} +Partitions cannot be used together with an `exclude` parameter. +:::: + + + + +## Multi-field terms aggregation [_multi_field_terms_aggregation] + +The `terms` aggregation does not support collecting terms from multiple fields in the same document. The reason is that the `terms` agg doesn’t collect the string term values themselves, but rather uses [global ordinals](#search-aggregations-bucket-terms-aggregation-execution-hint) to produce a list of all of the unique values in the field. Global ordinals results in an important performance boost which would not be possible across multiple fields. + +There are three approaches that you can use to perform a `terms` agg across multiple fields: + +[Script](#search-aggregations-bucket-terms-aggregation-script) +: Use a script to retrieve terms from multiple fields. This disables the global ordinals optimization and will be slower than collecting terms from a single field, but it gives you the flexibility to implement this option at search time. + +[`copy_to` field](/reference/elasticsearch/mapping-reference/copy-to.md) +: If you know ahead of time that you want to collect the terms from two or more fields, then use `copy_to` in your mapping to create a new dedicated field at index time which contains the values from both fields. You can aggregate on this single field, which will benefit from the global ordinals optimization. + +[`multi_terms` aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-multi-terms-aggregation.md) +: Use multi_terms aggregation to combine terms from multiple fields into a compound key. This also disables the global ordinals and will be slower than collecting terms from a single field. It is faster but less flexible than using a script. + + +## Collect mode [search-aggregations-bucket-terms-aggregation-collect] + +Deferring calculation of child aggregations + +For fields with many unique terms and a small number of required results it can be more efficient to delay the calculation of child aggregations until the top parent-level aggs have been pruned. Ordinarily, all branches of the aggregation tree are expanded in one depth-first pass and only then any pruning occurs. In some scenarios this can be very wasteful and can hit memory constraints. An example problem scenario is querying a movie database for the 10 most popular actors and their 5 most common co-stars: + +$$$terms-aggregation-collect-mode-example$$$ + +```console +GET /_search +{ + "aggs": { + "actors": { + "terms": { + "field": "actors", + "size": 10 + }, + "aggs": { + "costars": { + "terms": { + "field": "actors", + "size": 5 + } + } + } + } + } +} +``` + +Even though the number of actors may be comparatively small and we want only 50 result buckets there is a combinatorial explosion of buckets during calculation - a single actor can produce n² buckets where n is the number of actors. The sane option would be to first determine the 10 most popular actors and only then examine the top co-stars for these 10 actors. This alternative strategy is what we call the `breadth_first` collection mode as opposed to the `depth_first` mode. + +::::{note} +The `breadth_first` is the default mode for fields with a cardinality bigger than the requested size or when the cardinality is unknown (numeric fields or scripts for instance). It is possible to override the default heuristic and to provide a collect mode directly in the request: +:::: + + +$$$terms-aggregation-breadth-first-example$$$ + +```console +GET /_search +{ + "aggs": { + "actors": { + "terms": { + "field": "actors", + "size": 10, + "collect_mode": "breadth_first" <1> + }, + "aggs": { + "costars": { + "terms": { + "field": "actors", + "size": 5 + } + } + } + } + } +} +``` + +1. the possible values are `breadth_first` and `depth_first` + + +When using `breadth_first` mode the set of documents that fall into the uppermost buckets are cached for subsequent replay so there is a memory overhead in doing this which is linear with the number of matching documents. Note that the `order` parameter can still be used to refer to data from a child aggregation when using the `breadth_first` setting - the parent aggregation understands that this child aggregation will need to be called first before any of the other child aggregations. + +::::{warning} +Nested aggregations such as `top_hits` which require access to score information under an aggregation that uses the `breadth_first` collection mode need to replay the query on the second pass but only for the documents belonging to the top buckets. +:::: + + + +## Execution hint [search-aggregations-bucket-terms-aggregation-execution-hint] + +There are different mechanisms by which terms aggregations can be executed: + +* by using field values directly in order to aggregate data per-bucket (`map`) +* by using global ordinals of the field and allocating one bucket per global ordinal (`global_ordinals`) + +Elasticsearch tries to have sensible defaults so this is something that generally doesn’t need to be configured. + +`global_ordinals` is the default option for `keyword` field, it uses global ordinals to allocates buckets dynamically so memory usage is linear to the number of values of the documents that are part of the aggregation scope. + +`map` should only be considered when very few documents match a query. Otherwise the ordinals-based execution mode is significantly faster. By default, `map` is only used when running an aggregation on scripts, since they don’t have ordinals. + +$$$terms-aggregation-execution-hint-example$$$ + +```console +GET /_search +{ + "aggs": { + "tags": { + "terms": { + "field": "tags", + "execution_hint": "map" <1> + } + } + } +} +``` + +1. The possible values are `map`, `global_ordinals` + + +Please note that Elasticsearch will ignore this execution hint if it is not applicable and that there is no backward compatibility guarantee on these hints. + + +## Missing value [_missing_value_5] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +$$$terms-aggregation-missing-example$$$ + +```console +GET /_search +{ + "aggs": { + "tags": { + "terms": { + "field": "tags", + "missing": "N/A" <1> + } + } + } +} +``` + +1. Documents without a value in the `tags` field will fall into the same bucket as documents that have the value `N/A`. + + + +## Mixing field types [_mixing_field_types_2] + +::::{warning} +When aggregating on multiple indices the type of the aggregated field may not be the same in all indices. Some types are compatible with each other (`integer` and `long` or `float` and `double`) but when the types are a mix of decimal and non-decimal number the terms aggregation will promote the non-decimal numbers to decimal numbers. This can result in a loss of precision in the bucket values. +:::: + + + +### Troubleshooting [search-aggregations-bucket-terms-aggregation-troubleshooting] + +### Failed Trying to Format Bytes [_failed_trying_to_format_bytes] + +When running a terms aggregation (or other aggregation, but in practice usually terms) over multiple indices, you may get an error that starts with "Failed trying to format bytes…​". This is usually caused by two of the indices not having the same mapping type for the field being aggregated. + +**Use an explicit `value_type`** Although it’s best to correct the mappings, you can work around this issue if the field is unmapped in one of the indices. Setting the `value_type` parameter can resolve the issue by coercing the unmapped field into the correct type. + +$$$terms-aggregation-value_type-example$$$ + +```console +GET /_search +{ + "aggs": { + "ip_addresses": { + "terms": { + "field": "destination_ip", + "missing": "0.0.0.0", + "value_type": "ip" + } + } + } +} +``` diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-time-series-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-time-series-aggregation.md new file mode 100644 index 0000000000000..24bce3368a67f --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-time-series-aggregation.md @@ -0,0 +1,70 @@ +--- +navigation_title: "Time series" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-time-series-aggregation.html +--- + +# Time series aggregation [search-aggregations-bucket-time-series-aggregation] + + +::::{warning} +This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. +:::: + + +The time series aggregation queries data created using a [Time series data stream (TSDS)](docs-content://manage-data/data-store/data-streams/time-series-data-stream-tsds.md). This is typically data such as metrics or other data streams with a time component, and requires creating an index using the time series mode. + +::::{note} +Refer to the [TSDS documentation](docs-content://manage-data/data-store/data-streams/time-series-data-stream-tsds.md#differences-from-regular-data-stream) to learn more about the key differences from regular data streams. + +:::: + + +Data can be added to the time series index like other indices: + +```js +PUT /my-time-series-index-0/_bulk +{ "index": {} } +{ "key": "a", "val": 1, "@timestamp": "2022-01-01T00:00:10Z" } +{ "index": {}} +{ "key": "a", "val": 2, "@timestamp": "2022-01-02T00:00:00Z" } +{ "index": {} } +{ "key": "b", "val": 2, "@timestamp": "2022-01-01T00:00:10Z" } +{ "index": {}} +{ "key": "b", "val": 3, "@timestamp": "2022-01-02T00:00:00Z" } +``` + +To perform a time series aggregation, specify "time_series" as the aggregation type. When the boolean "keyed" is true, each bucket is given a unique key. + +$$$time-series-aggregation-example$$$ + +```js +GET /_search +{ + "aggs": { + "ts": { + "time_series": { "keyed": false } + } + } +} +``` + +This will return all results in the time series, however a more typical query will use sub aggregations to reduce the date returned to something more relevant. + +## Size [search-aggregations-bucket-time-series-aggregation-size] + +By default, `time series` aggregations return 10000 results. The "size" parameter can be used to limit the results further. Alternatively, using sub aggregations can limit the amount of values returned as a time series aggregation. + + +## Keyed [search-aggregations-bucket-time-series-aggregation-keyed] + +The `keyed` parameter determines if buckets are returned as a map with unique keys per bucket. By default with `keyed` set to false, buckets are returned as an array. + + +## Limitations [times-series-aggregations-limitations] + +The `time_series` aggregation has many limitations. Many aggregation performance optimizations are disabled when using the `time_series` aggregation. For example the filter by filter optimization or collect mode breath first (`terms` and `multi_terms` aggregation forcefully use the depth first collect mode). + +The following aggregations also fail to work if used in combination with the `time_series` aggregation: `auto_date_histogram`, `variable_width_histogram`, `rare_terms`, `global`, `composite`, `sampler`, `random_sampler` and `diversified_sampler`. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-bucket-variablewidthhistogram-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-variablewidthhistogram-aggregation.md new file mode 100644 index 0000000000000..ba44b9fff573a --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-bucket-variablewidthhistogram-aggregation.md @@ -0,0 +1,100 @@ +--- +navigation_title: "Variable width histogram" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-variablewidthhistogram-aggregation.html +--- + +# Variable width histogram aggregation [search-aggregations-bucket-variablewidthhistogram-aggregation] + + +This is a multi-bucket aggregation similar to [Histogram](/reference/data-analysis/aggregations/search-aggregations-bucket-histogram-aggregation.md). However, the width of each bucket is not specified. Rather, a target number of buckets is provided and bucket intervals are dynamically determined based on the document distribution. This is done using a simple one-pass document clustering algorithm that aims to obtain low distances between bucket centroids. Unlike other multi-bucket aggregations, the intervals will not necessarily have a uniform width. + +::::{tip} +The number of buckets returned will always be less than or equal to the target number. +:::: + + +Requesting a target of 2 buckets. + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "prices": { + "variable_width_histogram": { + "field": "price", + "buckets": 2 + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "prices": { + "buckets": [ + { + "min": 10.0, + "key": 30.0, + "max": 50.0, + "doc_count": 2 + }, + { + "min": 150.0, + "key": 185.0, + "max": 200.0, + "doc_count": 5 + } + ] + } + } +} +``` + +::::{important} +This aggregation cannot currently be nested under any aggregation that collects from more than a single bucket. +:::: + + +## Clustering Algorithm [_clustering_algorithm] + +Each shard fetches the first `initial_buffer` documents and stores them in memory. Once the buffer is full, these documents are sorted and linearly separated into `3/4 * shard_size buckets`. Next each remaining documents is either collected into the nearest bucket, or placed into a new bucket if it is distant from all the existing ones. At most `shard_size` total buckets are created. + +In the reduce step, the coordinating node sorts the buckets from all shards by their centroids. Then, the two buckets with the nearest centroids are repeatedly merged until the target number of buckets is achieved. This merging procedure is a form of [agglomerative hierarchical clustering](https://en.wikipedia.org/wiki/Hierarchical_clustering). + +::::{tip} +A shard can return fewer than `shard_size` buckets, but it cannot return more. +:::: + + + +## Shard size [_shard_size_3] + +The `shard_size` parameter specifies the number of buckets that the coordinating node will request from each shard. A higher `shard_size` leads each shard to produce smaller buckets. This reduces the likelihood of buckets overlapping after the reduction step. Increasing the `shard_size` will improve the accuracy of the histogram, but it will also make it more expensive to compute the final result because bigger priority queues will have to be managed on a shard level, and the data transfers between the nodes and the client will be larger. + +::::{tip} +Parameters `buckets`, `shard_size`, and `initial_buffer` are optional. By default, `buckets = 10`, `shard_size = buckets * 50`, and `initial_buffer = min(10 * shard_size, 50000)`. +:::: + + + +## Initial Buffer [_initial_buffer] + +The `initial_buffer` parameter can be used to specify the number of individual documents that will be stored in memory on a shard before the initial bucketing algorithm is run. Bucket distribution is determined using this sample of `initial_buffer` documents. So, although a higher `initial_buffer` will use more memory, it will lead to more representative clusters. + + +## Bucket bounds are approximate [_bucket_bounds_are_approximate] + +During the reduce step, the master node continuously merges the two buckets with the nearest centroids. If two buckets have overlapping bounds but distant centroids, then it is possible that they will not be merged. Because of this, after reduction the maximum value in some interval (`max`) might be greater than the minimum value in the subsequent bucket (`min`). To reduce the impact of this error, when such an overlap occurs the bound between these intervals is adjusted to be `(max + min) / 2`. + +::::{tip} +Bucket bounds are very sensitive to outliers +:::: + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-change-point-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-change-point-aggregation.md new file mode 100644 index 0000000000000..56960c32b5565 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-change-point-aggregation.md @@ -0,0 +1,136 @@ +--- +navigation_title: "Change point" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-change-point-aggregation.html +--- + +# Change point aggregation [search-aggregations-change-point-aggregation] + + +::::{warning} +This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. +:::: + + +A sibling pipeline that detects, spikes, dips, and change points in a metric. Given a distribution of values provided by the sibling multi-bucket aggregation, this aggregation indicates the bucket of any spike or dip and/or the bucket at which the largest change in the distribution of values, if they are statistically significant. + +::::{tip} +It is recommended to use the change point aggregation to detect changes in time-based data, however, you can use any metric to create buckets. +:::: + + +## Parameters [change-point-agg-syntax] + +`buckets_path` +: (Required, string) Path to the buckets that contain one set of values in which to detect a change point. There must be at least 22 bucketed values. Fewer than 1,000 is preferred. For syntax, see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax). + + +## Syntax [_syntax_11] + +A `change_point` aggregation looks like this in isolation: + +```js +{ + "change_point": { + "buckets_path": "date_histogram>_count" <1> + } +} +``` + +1. The buckets containing the values to test against. + + + +## Response body [change-point-agg-response] + +`bucket` +: (Optional, object) Values of the bucket that indicates the discovered change point. Not returned if no change point was found. All the aggregations in the bucket are returned as well. + + ::::{dropdown} Properties of bucket + `key` + : (value) The key of the bucket matched. Could be string or numeric. + + `doc_count` + : (number) The document count of the bucket. + + :::: + + +`type` +: (object) The found change point type and its related values. Possible types: + + * `dip`: a significant dip occurs at this change point + * `distribution_change`: the overall distribution of the values has changed significantly + * `non_stationary`: there is no change point, but the values are not from a stationary distribution + * `spike`: a significant spike occurs at this point + * `stationary`: no change point found + * `step_change`: the change indicates a statistically significant step up or down in value distribution + * `trend_change`: there is an overall trend change occurring at this point + + + +## Example [_example_7] + +The following example uses the Kibana sample data logs data set. + +```js +GET kibana_sample_data_logs/_search +{ + "aggs": { + "date":{ <1> + "date_histogram": { + "field": "@timestamp", + "fixed_interval": "1d" + }, + "aggs": { + "avg": { <2> + "avg": { + "field": "bytes" + } + } + } + }, + "change_points_avg": { <3> + "change_point": { + "buckets_path": "date>avg" <4> + } + } + } +} +``` + +1. A date histogram aggregation that creates buckets with one day long interval. +2. A sibling aggregation of the `date` aggregation that calculates the average value of the `bytes` field within every bucket. +3. The change point detection aggregation configuration object. +4. The path of the aggregation values to detect change points. In this case, the input of the change point aggregation is the value of `avg` which is a sibling aggregation of `date`. + + +The request returns a response that is similar to the following: + +```js + "change_points_avg" : { + "bucket" : { + "key" : "2023-04-29T00:00:00.000Z", <1> + "doc_count" : 329, <2> + "avg" : { <3> + "value" : 4737.209726443769 + } + }, + "type" : { <4> + "dip" : { + "p_value" : 3.8999455212466465e-10, <5> + "change_point" : 41 <6> + } + } + } +``` + +1. The bucket key that is the change point. +2. The number of documents in that bucket. +3. Aggregated values in the bucket. +4. Type of change found. +5. The `p_value` indicates how extreme the change is; lower values indicate greater change. +6. The specific bucket where the change occurs (indexing starts at `0`). + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-matrix-stats-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-matrix-stats-aggregation.md new file mode 100644 index 0000000000000..a3939ae956d4f --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-matrix-stats-aggregation.md @@ -0,0 +1,144 @@ +--- +navigation_title: "Matrix stats" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-matrix-stats-aggregation.html +--- + +# Matrix stats aggregation [search-aggregations-matrix-stats-aggregation] + + +The `matrix_stats` aggregation is a numeric aggregation that computes the following statistics over a set of document fields: + +`count` +: Number of per field samples included in the calculation. + +`mean` +: The average value for each field. + +`variance` +: Per field Measurement for how spread out the samples are from the mean. + +`skewness` +: Per field measurement quantifying the asymmetric distribution around the mean. + +`kurtosis` +: Per field measurement quantifying the shape of the distribution. + +`covariance` +: A matrix that quantitatively describes how changes in one field are associated with another. + +`correlation` +: The covariance matrix scaled to a range of -1 to 1, inclusive. Describes the relationship between field distributions. + +::::{important} +Unlike other metric aggregations, the `matrix_stats` aggregation does not support scripting. +:::: + + +The following example demonstrates the use of matrix stats to describe the relationship between income and poverty. + +$$$stats-aggregation-example$$$ + +```console +GET /_search +{ + "aggs": { + "statistics": { + "matrix_stats": { + "fields": [ "poverty", "income" ] + } + } + } +} +``` + +The aggregation type is `matrix_stats` and the `fields` setting defines the set of fields (as an array) for computing the statistics. The above request returns the following response: + +```console-result +{ + ... + "aggregations": { + "statistics": { + "doc_count": 50, + "fields": [ { + "name": "income", + "count": 50, + "mean": 51985.1, + "variance": 7.383377037755103E7, + "skewness": 0.5595114003506483, + "kurtosis": 2.5692365287787124, + "covariance": { + "income": 7.383377037755103E7, + "poverty": -21093.65836734694 + }, + "correlation": { + "income": 1.0, + "poverty": -0.8352655256272504 + } + }, { + "name": "poverty", + "count": 50, + "mean": 12.732000000000001, + "variance": 8.637730612244896, + "skewness": 0.4516049811903419, + "kurtosis": 2.8615929677997767, + "covariance": { + "income": -21093.65836734694, + "poverty": 8.637730612244896 + }, + "correlation": { + "income": -0.8352655256272504, + "poverty": 1.0 + } + } ] + } + } +} +``` + +The `doc_count` field indicates the number of documents involved in the computation of the statistics. + +## Multi Value Fields [_multi_value_fields] + +The `matrix_stats` aggregation treats each document field as an independent sample. The `mode` parameter controls what array value the aggregation will use for array or multi-valued fields. This parameter can take one of the following: + +`avg` +: (default) Use the average of all values. + +`min` +: Pick the lowest value. + +`max` +: Pick the highest value. + +`sum` +: Use the sum of all values. + +`median` +: Use the median of all values. + + +## Missing Values [_missing_values_3] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. This is done by adding a set of fieldname : value mappings to specify default values per field. + +$$$stats-aggregation-missing-example$$$ + +```console +GET /_search +{ + "aggs": { + "matrixstats": { + "matrix_stats": { + "fields": [ "poverty", "income" ], + "missing": { "income": 50000 } <1> + } + } + } +} +``` + +1. Documents without a value in the `income` field will have the default value `50000`. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-avg-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-avg-aggregation.md new file mode 100644 index 0000000000000..2c0c0a0a58dbb --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-avg-aggregation.md @@ -0,0 +1,137 @@ +--- +navigation_title: "Avg" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-avg-aggregation.html +--- + +# Avg aggregation [search-aggregations-metrics-avg-aggregation] + + +A `single-value` metrics aggregation that computes the average of numeric values that are extracted from the aggregated documents. These values can be extracted either from specific numeric or [histogram](/reference/elasticsearch/mapping-reference/histogram.md) fields in the documents. + +Assuming the data consists of documents representing exams grades (between 0 and 100) of students we can average their scores with: + +```console +POST /exams/_search?size=0 +{ + "aggs": { + "avg_grade": { "avg": { "field": "grade" } } + } +} +``` + +The above aggregation computes the average grade over all documents. The aggregation type is `avg` and the `field` setting defines the numeric field of the documents the average will be computed on. The above will return the following: + +```console-result +{ + ... + "aggregations": { + "avg_grade": { + "value": 75.0 + } + } +} +``` + +The name of the aggregation (`avg_grade` above) also serves as the key by which the aggregation result can be retrieved from the returned response. + +## Script [_script_2] + +Let’s say the exam was exceedingly difficult, and you need to apply a grade correction. Average a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md) to get a corrected average: + +```console +POST /exams/_search?size=0 +{ + "runtime_mappings": { + "grade.corrected": { + "type": "double", + "script": { + "source": "emit(Math.min(100, doc['grade'].value * params.correction))", + "params": { + "correction": 1.2 + } + } + } + }, + "aggs": { + "avg_corrected_grade": { + "avg": { + "field": "grade.corrected" + } + } + } +} +``` + + +## Missing value [_missing_value_6] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +```console +POST /exams/_search?size=0 +{ + "aggs": { + "grade_avg": { + "avg": { + "field": "grade", + "missing": 10 <1> + } + } + } +} +``` + +1. Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`. + + + +## Histogram fields [search-aggregations-metrics-avg-aggregation-histogram-fields] + +When average is computed on [histogram fields](/reference/elasticsearch/mapping-reference/histogram.md), the result of the aggregation is the weighted average of all elements in the `values` array taking into consideration the number in the same position in the `counts` array. + +For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks: + +```console +PUT metrics_index/_doc/1 +{ + "network.name" : "net-1", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], + "counts" : [3, 7, 23, 12, 6] + } +} + +PUT metrics_index/_doc/2 +{ + "network.name" : "net-2", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], + "counts" : [8, 17, 8, 7, 6] + } +} + +POST /metrics_index/_search?size=0 +{ + "aggs": { + "avg_latency": + { "avg": { "field": "latency_histo" } + } + } +} +``` + +For each histogram field the `avg` aggregation adds each number in the `values` array multiplied by its associated count in the `counts` array. Eventually, it will compute the average over those values for all histograms and return the following result: + +```console-result +{ + ... + "aggregations": { + "avg_latency": { + "value": 0.29690721649 + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-boxplot-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-boxplot-aggregation.md new file mode 100644 index 0000000000000..d8997339a9541 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-boxplot-aggregation.md @@ -0,0 +1,183 @@ +--- +navigation_title: "Boxplot" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-boxplot-aggregation.html +--- + +# Boxplot aggregation [search-aggregations-metrics-boxplot-aggregation] + + +A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents. These values can be generated from specific numeric or [histogram fields](/reference/elasticsearch/mapping-reference/histogram.md) in the documents. + +The `boxplot` aggregation returns essential information for making a [box plot](https://en.wikipedia.org/wiki/Box_plot): minimum, maximum, median, first quartile (25th percentile) and third quartile (75th percentile) values. + +## Syntax [_syntax_4] + +A `boxplot` aggregation looks like this in isolation: + +```js +{ + "boxplot": { + "field": "load_time" + } +} +``` + +Let’s look at a boxplot representing load time: + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_boxplot": { + "boxplot": { + "field": "load_time" <1> + } + } + } +} +``` + +1. The field `load_time` must be a numeric field + + +The response will look like this: + +```console-result +{ + ... + + "aggregations": { + "load_time_boxplot": { + "min": 0.0, + "max": 990.0, + "q1": 167.5, + "q2": 445.0, + "q3": 722.5, + "lower": 0.0, + "upper": 990.0 + } + } +} +``` + +In this case, the lower and upper whisker values are equal to the min and max. In general, these values are the 1.5 * IQR range, which is to say the nearest values to `q1 - (1.5 * IQR)` and `q3 + (1.5 * IQR)`. Since this is an approximation, the given values may not actually be observed values from the data, but should be within a reasonable error bound of them. While the Boxplot aggregation doesn’t directly return outlier points, you can check if `lower > min` or `upper < max` to see if outliers exist on either side, and then query for them directly. + + +## Script [_script_3] + +If you need to create a boxplot for values that aren’t indexed exactly you should create a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md) and get the boxplot of that. For example, if your load times are in milliseconds but you want values calculated in seconds, use a runtime field to convert them: + +```console +GET latency/_search +{ + "size": 0, + "runtime_mappings": { + "load_time.seconds": { + "type": "long", + "script": { + "source": "emit(doc['load_time'].value / params.timeUnit)", + "params": { + "timeUnit": 1000 + } + } + } + }, + "aggs": { + "load_time_boxplot": { + "boxplot": { "field": "load_time.seconds" } + } + } +} +``` + + +## Boxplot values are (usually) approximate [search-aggregations-metrics-boxplot-aggregation-approximation] + +The algorithm used by the `boxplot` metric is called TDigest (introduced by Ted Dunning in [Computing Accurate Quantiles using T-Digests](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf)). + +::::{warning} +Boxplot as other percentile aggregations are also [non-deterministic](https://en.wikipedia.org/wiki/Nondeterministic_algorithm). This means you can get slightly different results using the same data. + +:::: + + + +## Compression [search-aggregations-metrics-boxplot-aggregation-compression] + +Approximate algorithms must balance memory utilization with estimation accuracy. This balance can be controlled using a `compression` parameter: + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_boxplot": { + "boxplot": { + "field": "load_time", + "compression": 200 <1> + } + } + } +} +``` + +1. Compression controls memory usage and approximation error + + +The TDigest algorithm uses a number of "nodes" to approximate percentiles — the more nodes available, the higher the accuracy (and large memory footprint) proportional to the volume of data. The `compression` parameter limits the maximum number of nodes to `20 * compression`. + +Therefore, by increasing the compression value, you can increase the accuracy of your percentiles at the cost of more memory. Larger compression values also make the algorithm slower since the underlying tree data structure grows in size, resulting in more expensive operations. The default compression value is `100`. + +A "node" uses roughly 32 bytes of memory, so under worst-case scenarios (large amount of data which arrives sorted and in-order) the default settings will produce a TDigest roughly 64KB in size. In practice data tends to be more random and the TDigest will use less memory. + + +## Execution hint [_execution_hint_3] + +The default implementation of TDigest is optimized for performance, scaling to millions or even billions of sample values while maintaining acceptable accuracy levels (close to 1% relative error for millions of samples in some cases). There’s an option to use an implementation optimized for accuracy by setting parameter `execution_hint` to value `high_accuracy`: + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_boxplot": { + "boxplot": { + "field": "load_time", + "execution_hint": "high_accuracy" <1> + } + } + } +} +``` + +1. Optimize TDigest for accuracy, at the expense of performance + + +This option can lead to improved accuracy (relative error close to 0.01% for millions of samples in some cases) but then percentile queries take 2x-10x longer to complete. + + +## Missing value [_missing_value_7] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "grade_boxplot": { + "boxplot": { + "field": "grade", + "missing": 10 <1> + } + } + } +} +``` + +1. Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-cardinality-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-cardinality-aggregation.md new file mode 100644 index 0000000000000..0c7711824a357 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-cardinality-aggregation.md @@ -0,0 +1,154 @@ +--- +navigation_title: "Cardinality" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html +--- + +# Cardinality aggregation [search-aggregations-metrics-cardinality-aggregation] + + +A `single-value` metrics aggregation that calculates an approximate count of distinct values. + +Assume you are indexing store sales and would like to count the unique number of sold products that match a query: + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "type_count": { + "cardinality": { + "field": "type" + } + } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "type_count": { + "value": 3 + } + } +} +``` + +## Precision control [_precision_control] + +This aggregation also supports the `precision_threshold` option: + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "type_count": { + "cardinality": { + "field": "type", + "precision_threshold": 100 <1> + } + } + } +} +``` + +1. The `precision_threshold` options allows to trade memory for accuracy, and defines a unique count below which counts are expected to be close to accurate. Above this value, counts might become a bit more fuzzy. The maximum supported value is 40000, thresholds above this number will have the same effect as a threshold of 40000. The default value is `3000`. + + + +## Counts are approximate [_counts_are_approximate] + +Computing exact counts requires loading values into a hash set and returning its size. This doesn’t scale when working on high-cardinality sets and/or large values as the required memory usage and the need to communicate those per-shard sets between nodes would utilize too many resources of the cluster. + +This `cardinality` aggregation is based on the [HyperLogLog++](https://static.googleusercontent.com/media/research.google.com/fr//pubs/archive/40671.pdf) algorithm, which counts based on the hashes of the values with some interesting properties: + +* configurable precision, which decides on how to trade memory for accuracy, +* excellent accuracy on low-cardinality sets, +* fixed memory usage: no matter if there are tens or billions of unique values, memory usage only depends on the configured precision. + +For a precision threshold of `c`, the implementation that we are using requires about `c * 8` bytes. + +The following chart shows how the error varies before and after the threshold: + +![cardinality error](../../../images/cardinality_error.png "") + +For all 3 thresholds, counts have been accurate up to the configured threshold. Although not guaranteed, this is likely to be the case. Accuracy in practice depends on the dataset in question. In general, most datasets show consistently good accuracy. Also note that even with a threshold as low as 100, the error remains very low (1-6% as seen in the above graph) even when counting millions of items. + +The HyperLogLog++ algorithm depends on the leading zeros of hashed values, the exact distributions of hashes in a dataset can affect the accuracy of the cardinality. + + +## Pre-computed hashes [_pre_computed_hashes] + +On string fields that have a high cardinality, it might be faster to store the hash of your field values in your index and then run the cardinality aggregation on this field. This can either be done by providing hash values from client-side or by letting Elasticsearch compute hash values for you by using the [`mapper-murmur3`](/reference/elasticsearch-plugins/mapper-murmur3.md) plugin. + +::::{note} +Pre-computing hashes is usually only useful on very large and/or high-cardinality fields as it saves CPU and memory. However, on numeric fields, hashing is very fast and storing the original values requires as much or less memory than storing the hashes. This is also true on low-cardinality string fields, especially given that those have an optimization in order to make sure that hashes are computed at most once per unique value per segment. +:::: + + + +## Script [_script_4] + +If you need the cardinality of the combination of two fields, create a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md) combining them and aggregate it. + +```console +POST /sales/_search?size=0 +{ + "runtime_mappings": { + "type_and_promoted": { + "type": "keyword", + "script": "emit(doc['type'].value + ' ' + doc['promoted'].value)" + } + }, + "aggs": { + "type_promoted_count": { + "cardinality": { + "field": "type_and_promoted" + } + } + } +} +``` + + +## Missing value [_missing_value_8] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "tag_cardinality": { + "cardinality": { + "field": "tag", + "missing": "N/A" <1> + } + } + } +} +``` + +1. Documents without a value in the `tag` field will fall into the same bucket as documents that have the value `N/A`. + + + +## Execution hint [_execution_hint_4] + +You can run cardinality aggregations using different mechanisms: + +* by using field values directly (`direct`) +* by using global ordinals of the field and resolving those values after finishing a shard (`global_ordinals`) +* by using segment ordinal values and resolving those values after each segment (`segment_ordinals`) + +Additionally, there are two "heuristic based" modes. These modes will cause {{es}} to use some data about the state of the index to choose an appropriate execution method. The two heuristics are: + +* `save_time_heuristic` - this is the default in {{es}} 8.4 and later. +* `save_memory_heuristic` - this was the default in {{es}} 8.3 and earlier + +When not specified, {{es}} will apply a heuristic to choose the appropriate mode. Also note that for some data (non-ordinal fields), `direct` is the only option, and the hint will be ignored in these cases. Generally speaking, it should not be necessary to set this value. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-cartesian-bounds-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-cartesian-bounds-aggregation.md new file mode 100644 index 0000000000000..832014b936d56 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-cartesian-bounds-aggregation.md @@ -0,0 +1,143 @@ +--- +navigation_title: "Cartesian-bounds" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cartesian-bounds-aggregation.html +--- + +# Cartesian-bounds aggregation [search-aggregations-metrics-cartesian-bounds-aggregation] + + +A metric aggregation that computes the spatial bounding box containing all values for a [Point](/reference/elasticsearch/mapping-reference/point.md) or [Shape](/reference/elasticsearch/mapping-reference/shape.md) field. + +Example: + +```console +PUT /museums +{ + "mappings": { + "properties": { + "location": { + "type": "point" + } + } + } +} + +POST /museums/_bulk?refresh +{"index":{"_id":1}} +{"location": "POINT (491.2350 5237.4081)", "city": "Amsterdam", "name": "NEMO Science Museum"} +{"index":{"_id":2}} +{"location": "POINT (490.1618 5236.9219)", "city": "Amsterdam", "name": "Museum Het Rembrandthuis"} +{"index":{"_id":3}} +{"location": "POINT (491.4722 5237.1667)", "city": "Amsterdam", "name": "Nederlands Scheepvaartmuseum"} +{"index":{"_id":4}} +{"location": "POINT (440.5200 5122.2900)", "city": "Antwerp", "name": "Letterenhuis"} +{"index":{"_id":5}} +{"location": "POINT (233.6389 4886.1111)", "city": "Paris", "name": "Musée du Louvre"} +{"index":{"_id":6}} +{"location": "POINT (232.7000 4886.0000)", "city": "Paris", "name": "Musée d'Orsay"} + +POST /museums/_search?size=0 +{ + "query": { + "match": { "name": "musée" } + }, + "aggs": { + "viewport": { + "cartesian_bounds": { + "field": "location" <1> + } + } + } +} +``` + +1. The `cartesian_bounds` aggregation specifies the field to use to obtain the bounds, which must be a [Point](/reference/elasticsearch/mapping-reference/point.md) or a [Shape](/reference/elasticsearch/mapping-reference/shape.md) type. + + +::::{note} +Unlike the case with the [`geo_bounds`](/reference/data-analysis/aggregations/search-aggregations-metrics-geobounds-aggregation.md#geobounds-aggregation-geo-shape) aggregation, there is no option to set [`wrap_longitude`](/reference/data-analysis/aggregations/search-aggregations-metrics-geobounds-aggregation.md#geo-bounds-wrap-longitude). This is because the cartesian space is euclidean and does not wrap back on itself. So the bounds will always have a minimum x value less than or equal to the maximum x value. +:::: + + +The above aggregation demonstrates how one would compute the bounding box of the location field for all documents with a name matching "musée". + +The response for the above aggregation: + +```console-result +{ + ... + "aggregations": { + "viewport": { + "bounds": { + "top_left": { + "x": 232.6999969482422, + "y": 4886.111328125 + }, + "bottom_right": { + "x": 233.63890075683594, + "y": 4886.0 + } + } + } + } +} +``` + + +## Cartesian Bounds Aggregation on `shape` fields [cartesian-bounds-aggregation-shape] + +The Cartesian Bounds Aggregation is also supported on `cartesian_shape` fields. + +Example: + +```console +PUT /places +{ + "mappings": { + "properties": { + "geometry": { + "type": "shape" + } + } + } +} + +POST /places/_bulk?refresh +{"index":{"_id":1}} +{"name": "NEMO Science Museum", "geometry": "POINT(491.2350 5237.4081)" } +{"index":{"_id":2}} +{"name": "Sportpark De Weeren", "geometry": { "type": "Polygon", "coordinates": [ [ [ 496.5305328369141, 5239.347642069457 ], [ 496.6979026794433, 5239.1721758934835 ], [ 496.9425201416015, 5239.238958618537 ], [ 496.7944622039794, 5239.420969150824 ], [ 496.5305328369141, 5239.347642069457 ] ] ] } } + +POST /places/_search?size=0 +{ + "aggs": { + "viewport": { + "cartesian_bounds": { + "field": "geometry" + } + } + } +} +``` + +```console-result +{ + ... + "aggregations": { + "viewport": { + "bounds": { + "top_left": { + "x": 491.2349853515625, + "y": 5239.4208984375 + }, + "bottom_right": { + "x": 496.9425048828125, + "y": 5237.408203125 + } + } + } + } +} +``` + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-cartesian-centroid-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-cartesian-centroid-aggregation.md new file mode 100644 index 0000000000000..cc9b820897496 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-cartesian-centroid-aggregation.md @@ -0,0 +1,203 @@ +--- +navigation_title: "Cartesian-centroid" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cartesian-centroid-aggregation.html +--- + +# Cartesian-centroid aggregation [search-aggregations-metrics-cartesian-centroid-aggregation] + + +A metric aggregation that computes the weighted [centroid](https://en.wikipedia.org/wiki/Centroid) from all coordinate values for point and shape fields. + +Example: + +```console +PUT /museums +{ + "mappings": { + "properties": { + "location": { + "type": "point" + } + } + } +} + +POST /museums/_bulk?refresh +{"index":{"_id":1}} +{"location": "POINT (491.2350 5237.4081)", "city": "Amsterdam", "name": "NEMO Science Museum"} +{"index":{"_id":2}} +{"location": "POINT (490.1618 5236.9219)", "city": "Amsterdam", "name": "Museum Het Rembrandthuis"} +{"index":{"_id":3}} +{"location": "POINT (491.4722 5237.1667)", "city": "Amsterdam", "name": "Nederlands Scheepvaartmuseum"} +{"index":{"_id":4}} +{"location": "POINT (440.5200 5122.2900)", "city": "Antwerp", "name": "Letterenhuis"} +{"index":{"_id":5}} +{"location": "POINT (233.6389 4886.1111)", "city": "Paris", "name": "Musée du Louvre"} +{"index":{"_id":6}} +{"location": "POINT (232.7000 4886.0000)", "city": "Paris", "name": "Musée d'Orsay"} + +POST /museums/_search?size=0 +{ + "aggs": { + "centroid": { + "cartesian_centroid": { + "field": "location" <1> + } + } + } +} +``` + +1. The `cartesian_centroid` aggregation specifies the field to use for computing the centroid, which must be a [Point](/reference/elasticsearch/mapping-reference/point.md) or a [Shape](/reference/elasticsearch/mapping-reference/shape.md) type. + + +The above aggregation demonstrates how one would compute the centroid of the location field for all museums' documents. + +The response for the above aggregation: + +```console-result +{ + ... + "aggregations": { + "centroid": { + "location": { + "x": 396.6213124593099, + "y": 5100.982991536458 + }, + "count": 6 + } + } +} +``` + +The `cartesian_centroid` aggregation is more interesting when combined as a sub-aggregation to other bucket aggregations. + +Example: + +```console +POST /museums/_search?size=0 +{ + "aggs": { + "cities": { + "terms": { "field": "city.keyword" }, + "aggs": { + "centroid": { + "cartesian_centroid": { "field": "location" } + } + } + } + } +} +``` + +The above example uses `cartesian_centroid` as a sub-aggregation to a [terms](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md) bucket aggregation for finding the central location for museums in each city. + +The response for the above aggregation: + +```console-result +{ + ... + "aggregations": { + "cities": { + "sum_other_doc_count": 0, + "doc_count_error_upper_bound": 0, + "buckets": [ + { + "key": "Amsterdam", + "doc_count": 3, + "centroid": { + "location": { + "x": 490.9563293457031, + "y": 5237.16552734375 + }, + "count": 3 + } + }, + { + "key": "Paris", + "doc_count": 2, + "centroid": { + "location": { + "x": 233.16944885253906, + "y": 4886.0556640625 + }, + "count": 2 + } + }, + { + "key": "Antwerp", + "doc_count": 1, + "centroid": { + "location": { + "x": 440.5199890136719, + "y": 5122.2900390625 + }, + "count": 1 + } + } + ] + } + } +} +``` + + +## Cartesian Centroid Aggregation on `shape` fields [cartesian-centroid-aggregation-geo-shape] + +The centroid metric for shapes is more nuanced than for points. The centroid of a specific aggregation bucket containing shapes is the centroid of the highest-dimensionality shape type in the bucket. For example, if a bucket contains shapes consisting of polygons and lines, then the lines do not contribute to the centroid metric. Each type of shape’s centroid is calculated differently. Envelopes and circles ingested via the [Circle](/reference/ingestion-tools/enrich-processor/ingest-circle-processor.md) are treated as polygons. + +| Geometry Type | Centroid Calculation | +| --- | --- | +| [Multi]Point | equally weighted average of all the coordinates | +| [Multi]LineString | a weighted average of all the centroids of each segment, where the weight of each segment is its length in the same units as the coordinates | +| [Multi]Polygon | a weighted average of all the centroids of all the triangles of a polygon where the triangles are formed by every two consecutive vertices and the starting-point.holes have negative weights. weights represent the area of the triangle is calculated in the square of the units of the coordinates | +| GeometryCollection | The centroid of all the underlying geometries with the highest dimension. If Polygons and Lines and/or Points, then lines and/or points are ignored.If Lines and Points, then points are ignored | + +Example: + +```console +PUT /places +{ + "mappings": { + "properties": { + "geometry": { + "type": "shape" + } + } + } +} + +POST /places/_bulk?refresh +{"index":{"_id":1}} +{"name": "NEMO Science Museum", "geometry": "POINT(491.2350 5237.4081)" } +{"index":{"_id":2}} +{"name": "Sportpark De Weeren", "geometry": { "type": "Polygon", "coordinates": [ [ [ 496.5305328369141, 5239.347642069457 ], [ 496.6979026794433, 5239.1721758934835 ], [ 496.9425201416015, 5239.238958618537 ], [ 496.7944622039794, 5239.420969150824 ], [ 496.5305328369141, 5239.347642069457 ] ] ] } } + +POST /places/_search?size=0 +{ + "aggs": { + "centroid": { + "cartesian_centroid": { + "field": "geometry" + } + } + } +} +``` + +```console-result +{ + ... + "aggregations": { + "centroid": { + "location": { + "x": 496.74041748046875, + "y": 5239.29638671875 + }, + "count": 2 + } + } +} +``` + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-extendedstats-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-extendedstats-aggregation.md new file mode 100644 index 0000000000000..ff573b08dd54a --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-extendedstats-aggregation.md @@ -0,0 +1,148 @@ +--- +navigation_title: "Extended stats" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-extendedstats-aggregation.html +--- + +# Extended stats aggregation [search-aggregations-metrics-extendedstats-aggregation] + + +A `multi-value` metrics aggregation that computes stats over numeric values extracted from the aggregated documents. + +The `extended_stats` aggregations is an extended version of the [`stats`](/reference/data-analysis/aggregations/search-aggregations-metrics-stats-aggregation.md) aggregation, where additional metrics are added such as `sum_of_squares`, `variance`, `std_deviation` and `std_deviation_bounds`. + +Assuming the data consists of documents representing exams grades (between 0 and 100) of students + +```console +GET /exams/_search +{ + "size": 0, + "aggs": { + "grades_stats": { "extended_stats": { "field": "grade" } } + } +} +``` + +The above aggregation computes the grades statistics over all documents. The aggregation type is `extended_stats` and the `field` setting defines the numeric field of the documents the stats will be computed on. The above will return the following: + +The `std_deviation` and `variance` are calculated as population metrics so they are always the same as `std_deviation_population` and `variance_population` respectively. + +```console-result +{ + ... + + "aggregations": { + "grades_stats": { + "count": 2, + "min": 50.0, + "max": 100.0, + "avg": 75.0, + "sum": 150.0, + "sum_of_squares": 12500.0, + "variance": 625.0, + "variance_population": 625.0, + "variance_sampling": 1250.0, + "std_deviation": 25.0, + "std_deviation_population": 25.0, + "std_deviation_sampling": 35.35533905932738, + "std_deviation_bounds": { + "upper": 125.0, + "lower": 25.0, + "upper_population": 125.0, + "lower_population": 25.0, + "upper_sampling": 145.71067811865476, + "lower_sampling": 4.289321881345245 + } + } + } +} +``` + +The name of the aggregation (`grades_stats` above) also serves as the key by which the aggregation result can be retrieved from the returned response. + +## Standard Deviation Bounds [_standard_deviation_bounds] + +By default, the `extended_stats` metric will return an object called `std_deviation_bounds`, which provides an interval of plus/minus two standard deviations from the mean. This can be a useful way to visualize variance of your data. If you want a different boundary, for example three standard deviations, you can set `sigma` in the request: + +```console +GET /exams/_search +{ + "size": 0, + "aggs": { + "grades_stats": { + "extended_stats": { + "field": "grade", + "sigma": 3 <1> + } + } + } +} +``` + +1. `sigma` controls how many standard deviations +/- from the mean should be displayed + + +`sigma` can be any non-negative double, meaning you can request non-integer values such as `1.5`. A value of `0` is valid, but will simply return the average for both `upper` and `lower` bounds. + +The `upper` and `lower` bounds are calculated as population metrics so they are always the same as `upper_population` and `lower_population` respectively. + +::::{admonition} Standard Deviation and Bounds require normality +:class: note + +The standard deviation and its bounds are displayed by default, but they are not always applicable to all data-sets. Your data must be normally distributed for the metrics to make sense. The statistics behind standard deviations assumes normally distributed data, so if your data is skewed heavily left or right, the value returned will be misleading. + +:::: + + + +## Script [_script_5] + +If you need to aggregate on a value that isn’t indexed, use a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). Say the we found out that the grades we’ve been working on were for an exam that was above the level of the students and we want to "correct" it: + +```console +GET /exams/_search +{ + "size": 0, + "runtime_mappings": { + "grade.corrected": { + "type": "double", + "script": { + "source": "emit(Math.min(100, doc['grade'].value * params.correction))", + "params": { + "correction": 1.2 + } + } + } + }, + "aggs": { + "grades_stats": { + "extended_stats": { "field": "grade.corrected" } + } + } +} +``` + + +## Missing value [_missing_value_9] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +```console +GET /exams/_search +{ + "size": 0, + "aggs": { + "grades_stats": { + "extended_stats": { + "field": "grade", + "missing": 0 <1> + } + } + } +} +``` + +1. Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `0`. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-geo-line.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-geo-line.md new file mode 100644 index 0000000000000..ca57725ff13de --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-geo-line.md @@ -0,0 +1,396 @@ +--- +navigation_title: "Geo-line" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-geo-line.html +--- + +# Geo-line aggregation [search-aggregations-metrics-geo-line] + + +The `geo_line` aggregation aggregates all `geo_point` values within a bucket into a `LineString` ordered by the chosen `sort` field. This `sort` can be a date field, for example. The bucket returned is a valid [GeoJSON Feature](https://tools.ietf.org/html/rfc7946#section-3.2) representing the line geometry. + +$$$search-aggregations-metrics-geo-line-simple$$$ + +```console +PUT test +{ + "mappings": { + "properties": { + "my_location": { "type": "geo_point" }, + "group": { "type": "keyword" }, + "@timestamp": { "type": "date" } + } + } +} + +POST /test/_bulk?refresh +{"index":{}} +{"my_location": {"lat":52.373184, "lon":4.889187}, "@timestamp": "2023-01-02T09:00:00Z"} +{"index":{}} +{"my_location": {"lat":52.370159, "lon":4.885057}, "@timestamp": "2023-01-02T10:00:00Z"} +{"index":{}} +{"my_location": {"lat":52.369219, "lon":4.901618}, "@timestamp": "2023-01-02T13:00:00Z"} +{"index":{}} +{"my_location": {"lat":52.374081, "lon":4.912350}, "@timestamp": "2023-01-02T16:00:00Z"} +{"index":{}} +{"my_location": {"lat":52.371667, "lon":4.914722}, "@timestamp": "2023-01-03T12:00:00Z"} + +POST /test/_search?filter_path=aggregations +{ + "aggs": { + "line": { + "geo_line": { + "point": {"field": "my_location"}, + "sort": {"field": "@timestamp"} + } + } + } +} +``` + +Which returns: + +```js +{ + "aggregations": { + "line": { + "type": "Feature", + "geometry": { + "type": "LineString", + "coordinates": [ + [ 4.889187, 52.373184 ], + [ 4.885057, 52.370159 ], + [ 4.901618, 52.369219 ], + [ 4.912350, 52.374081 ], + [ 4.914722, 52.371667 ] + ] + }, + "properties": { + "complete": true + } + } + } +} +``` + +The resulting [GeoJSON Feature](https://tools.ietf.org/html/rfc7946#section-3.2) contains both a `LineString` geometry for the path generated by the aggregation, as well as a map of `properties`. The property `complete` informs of whether all documents matched were used to generate the geometry. The [`size` option](#search-aggregations-metrics-geo-line-size) can be used to limit the number of documents included in the aggregation, leading to results with `complete: false`. Exactly which documents are dropped from results [depends on whether the aggregation is based on `time_series` or not](#search-aggregations-metrics-geo-line-grouping-time-series-advantages). + +This result could be displayed in a map user interface: + +![Kibana map with museum tour of Amsterdam](../../../images/geo_line.png "") + +## Options [search-aggregations-metrics-geo-line-options] + +`point` +: (Required) + +This option specifies the name of the `geo_point` field + +Example usage configuring `my_location` as the point field: + +```js +"point": { + "field": "my_location" +} +``` + +`sort` +: (Required outside [`time_series`](#search-aggregations-metrics-geo-line-grouping-time-series) aggregations) + +This option specifies the name of the numeric field to use as the sort key for ordering the points. When the `geo_line` aggregation is nested inside a [`time_series`](#search-aggregations-metrics-geo-line-grouping-time-series) aggregation, this field defaults to `@timestamp`, and any other value will result in error. + +Example usage configuring `@timestamp` as the sort key: + +```js +"sort": { + "field": "@timestamp" +} +``` + +`include_sort` +: (Optional, boolean, default: `false`) This option includes, when true, an additional array of the sort values in the feature properties. + +`sort_order` +: (Optional, string, default: `"ASC"`) This option accepts one of two values: "ASC", "DESC". The line is sorted in ascending order by the sort key when set to "ASC", and in descending with "DESC". + +$$$search-aggregations-metrics-geo-line-size$$$ + +`size` +: (Optional, integer, default: `10000`) The maximum length of the line represented in the aggregation. Valid sizes are between one and 10000. Within [`time_series`](#search-aggregations-metrics-geo-line-grouping-time-series) the aggregation uses line simplification to constrain the size, otherwise it uses truncation. Refer to [Why group with time-series?](#search-aggregations-metrics-geo-line-grouping-time-series-advantages) for a discussion on the subtleties involved. + + +## Grouping [search-aggregations-metrics-geo-line-grouping] + +This simple example produces a single track for all the data selected by the query. However, it is far more common to need to group the data into multiple tracks. For example, grouping flight transponder measurements by flight call-sign before sorting each flight by timestamp and producing a separate track for each. + +In the following examples we will group the locations of points of interest in the cities of Amsterdam, Antwerp and Paris. The tracks will be ordered by the planned visit sequence for a walking tour of the museums and others attractions. + +In order to demonstrate the difference between a time-series grouping and a non-time-series grouping, we will first create an index with [time-series enabled](/reference/elasticsearch/index-settings/time-series.md), and then give examples of grouping the same data without time-series and with time-series. + +$$$search-aggregations-metrics-geo-line-grouping-setup$$$ + +```console +PUT tour +{ + "mappings": { + "properties": { + "city": { + "type": "keyword", + "time_series_dimension": true + }, + "category": { "type": "keyword" }, + "route": { "type": "long" }, + "name": { "type": "keyword" }, + "location": { "type": "geo_point" }, + "@timestamp": { "type": "date" } + } + }, + "settings": { + "index": { + "mode": "time_series", + "routing_path": [ "city" ], + "time_series": { + "start_time": "2023-01-01T00:00:00Z", + "end_time": "2024-01-01T00:00:00Z" + } + } + } +} + +POST /tour/_bulk?refresh +{"index":{}} +{"@timestamp": "2023-01-02T09:00:00Z", "route": 0, "location": "POINT(4.889187 52.373184)", "city": "Amsterdam", "category": "Attraction", "name": "Royal Palace Amsterdam"} +{"index":{}} +{"@timestamp": "2023-01-02T10:00:00Z", "route": 1, "location": "POINT(4.885057 52.370159)", "city": "Amsterdam", "category": "Attraction", "name": "The Amsterdam Dungeon"} +{"index":{}} +{"@timestamp": "2023-01-02T13:00:00Z", "route": 2, "location": "POINT(4.901618 52.369219)", "city": "Amsterdam", "category": "Museum", "name": "Museum Het Rembrandthuis"} +{"index":{}} +{"@timestamp": "2023-01-02T16:00:00Z", "route": 3, "location": "POINT(4.912350 52.374081)", "city": "Amsterdam", "category": "Museum", "name": "NEMO Science Museum"} +{"index":{}} +{"@timestamp": "2023-01-03T12:00:00Z", "route": 4, "location": "POINT(4.914722 52.371667)", "city": "Amsterdam", "category": "Museum", "name": "Nederlands Scheepvaartmuseum"} +{"index":{}} +{"@timestamp": "2023-01-04T09:00:00Z", "route": 5, "location": "POINT(4.401384 51.220292)", "city": "Antwerp", "category": "Attraction", "name": "Cathedral of Our Lady"} +{"index":{}} +{"@timestamp": "2023-01-04T12:00:00Z", "route": 6, "location": "POINT(4.405819 51.221758)", "city": "Antwerp", "category": "Museum", "name": "Snijders&Rockoxhuis"} +{"index":{}} +{"@timestamp": "2023-01-04T15:00:00Z", "route": 7, "location": "POINT(4.405200 51.222900)", "city": "Antwerp", "category": "Museum", "name": "Letterenhuis"} +{"index":{}} +{"@timestamp": "2023-01-05T10:00:00Z", "route": 8, "location": "POINT(2.336389 48.861111)", "city": "Paris", "category": "Museum", "name": "Musée du Louvre"} +{"index":{}} +{"@timestamp": "2023-01-05T14:00:00Z", "route": 9, "location": "POINT(2.327000 48.860000)", "city": "Paris", "category": "Museum", "name": "Musée dOrsay"} +``` + + +## Grouping with terms [search-aggregations-metrics-geo-line-grouping-terms] + +Using this data, for a non-time-series use case, the grouping can be done using a [terms aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md) based on city name. This would work whether or not we had defined the `tour` index as a time series index. + +$$$search-aggregations-metrics-geo-line-terms$$$ + +```console +POST /tour/_search?filter_path=aggregations +{ + "aggregations": { + "path": { + "terms": {"field": "city"}, + "aggregations": { + "museum_tour": { + "geo_line": { + "point": {"field": "location"}, + "sort": {"field": "@timestamp"} + } + } + } + } + } +} +``` + +Which returns: + +```js +{ + "aggregations": { + "path": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "Amsterdam", + "doc_count": 5, + "museum_tour": { + "type": "Feature", + "geometry": { + "coordinates": [ [ 4.889187, 52.373184 ], [ 4.885057, 52.370159 ], [ 4.901618, 52.369219 ], [ 4.91235, 52.374081 ], [ 4.914722, 52.371667 ] ], + "type": "LineString" + }, + "properties": { + "complete": true + } + } + }, + { + "key": "Antwerp", + "doc_count": 3, + "museum_tour": { + "type": "Feature", + "geometry": { + "coordinates": [ [ 4.401384, 51.220292 ], [ 4.405819, 51.221758 ], [ 4.4052, 51.2229 ] ], + "type": "LineString" + }, + "properties": { + "complete": true + } + } + }, + { + "key": "Paris", + "doc_count": 2, + "museum_tour": { + "type": "Feature", + "geometry": { + "coordinates": [ [ 2.336389, 48.861111 ], [ 2.327, 48.86 ] ], + "type": "LineString" + }, + "properties": { + "complete": true + } + } + } + ] + } + } +} +``` + +These results contain an array of buckets, where each bucket is a JSON object with the `key` showing the name of the `city` field, and an inner aggregation result called `museum_tour` containing a [GeoJSON Feature](https://tools.ietf.org/html/rfc7946#section-3.2) describing the actual route between the various attractions in that city. Each result also includes a `properties` object with a `complete` value which will be `false` if the geometry was truncated to the limits specified in the `size` parameter. Note that when we use `time_series` in the next example, we will get the same results structured a little differently. + + +## Grouping with time-series [search-aggregations-metrics-geo-line-grouping-time-series] + +::::{warning} +This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. +:::: + + +Using the same data as before, we can also perform the grouping with a [`time_series` aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-time-series-aggregation.md). This will group by TSID, which is defined as the combinations of all fields with `time_series_dimension: true`, in this case the same `city` field used in the previous [terms aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md). This example will only work if we defined the `tour` index as a time series index using `index.mode="time_series"`. + +$$$search-aggregations-metrics-geo-line-time-series$$$ + +```console +POST /tour/_search?filter_path=aggregations +{ + "aggregations": { + "path": { + "time_series": {}, + "aggregations": { + "museum_tour": { + "geo_line": { + "point": {"field": "location"} + } + } + } + } + } +} +``` + +::::{note} +The `geo_line` aggregation no longer requires the `sort` field when nested within a [`time_series` aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-time-series-aggregation.md). This is because the sort field is set to `@timestamp`, which all time-series indexes are pre-sorted by. If you do set this parameter, and set it to something other than `@timestamp` you will get an error. +:::: + + +This query will result in: + +```js +{ + "aggregations": { + "path": { + "buckets": { + "{city=Paris}": { + "key": { + "city": "Paris" + }, + "doc_count": 2, + "museum_tour": { + "type": "Feature", + "geometry": { + "coordinates": [ [ 2.336389, 48.861111 ], [ 2.327, 48.86 ] ], + "type": "LineString" + }, + "properties": { + "complete": true + } + } + }, + "{city=Antwerp}": { + "key": { + "city": "Antwerp" + }, + "doc_count": 3, + "museum_tour": { + "type": "Feature", + "geometry": { + "coordinates": [ [ 4.401384, 51.220292 ], [ 4.405819, 51.221758 ], [ 4.4052, 51.2229 ] ], + "type": "LineString" + }, + "properties": { + "complete": true + } + } + }, + "{city=Amsterdam}": { + "key": { + "city": "Amsterdam" + }, + "doc_count": 5, + "museum_tour": { + "type": "Feature", + "geometry": { + "coordinates": [ [ 4.889187, 52.373184 ], [ 4.885057, 52.370159 ], [ 4.901618, 52.369219 ], [ 4.91235, 52.374081 ], [ 4.914722, 52.371667 ] ], + "type": "LineString" + }, + "properties": { + "complete": true + } + } + } + } + } + } +} +``` + +These results are essentially the same as with the previous `terms` aggregation example, but structured differently. Here we see the buckets returned as a map, where the key is an internal description of the TSID. This TSID is unique for each unique combination of fields with `time_series_dimension: true`. Each bucket contains a `key` field which is also a map of all dimension values for the TSID, in this case only the city name is used for grouping. In addition, there is an inner aggregation result called `museum_tour` containing a [GeoJSON Feature](https://tools.ietf.org/html/rfc7946#section-3.2) describing the actual route between the various attractions in that city. Each result also includes a `properties` object with a `complete` value which will be false if the geometry was simplified to the limits specified in the `size` parameter. + + +## Why group with time-series? [search-aggregations-metrics-geo-line-grouping-time-series-advantages] + +When reviewing these examples, you might think that there is little difference between using [`terms`](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md) or [`time_series`](/reference/data-analysis/aggregations/search-aggregations-bucket-time-series-aggregation.md) to group the geo-lines. However, there are some important differences in behaviour between the two cases. Time series indexes are stored in a very specific order on disk. They are pre-grouped by the time-series dimension fields, and pre-sorted by the `@timestamp` field. This allows the `geo_line` aggregation to be considerably optimized: + +* The same memory allocated for the first bucket can be re-used over and over for all subsequent buckets. This is substantially less memory than required for non-time-series cases where all buckets are collected concurrently. +* No sorting needs to be done, since the data is pre-sorted by `@timestamp`. The time-series data will naturally arrive at the aggregation collector in `DESC` order. This means that if we specify `sort_order:ASC` (the default), we still collect in `DESC` order, but perform an efficient in-memory reverse order before generating the final `LineString` geometry. +* The `size` parameter can be used for a streaming line-simplification algorithm. Without time-series, we are forced to truncate data, by default after 10000 documents per bucket, in order to prevent memory usage from being unbounded. This can result in geo-lines being truncated, and therefor loosing important data. With time-series we can run a streaming line-simplification algorithm, retaining control over memory usage, while also maintaining the overall geometry shape. In fact, for most use cases it would work to set this `size` parameter to a much lower bound, and save even more memory. For example, if the `geo_line` is to be drawn on a display map with a specific resolution, it might look just as good to simplify to as few as 100 or 200 points. This will save memory on the server, on the network and in the client. + +Note: There are other significant advantages to working with time-series data and using `time_series` index mode. These are discussed in the documentation on [time series data streams](docs-content://manage-data/data-store/data-streams/time-series-data-stream-tsds.md). + + +## Streaming line simplification [search-aggregations-metrics-geo-line-simplification] + +Line simplification is a great way to reduce the size of the final results sent to the client, and displayed in a map user interface. However, normally these algorithms use a lot of memory to perform the simplification, requiring the entire geometry to be maintained in memory together with supporting data for the simplification itself. The use of a streaming line simplification algorithm allows for minimal memory usage during the simplification process by constraining memory to the bounds defined for the simplified geometry. This is only possible if no sorting is required, which is the case when grouping is done by the [`time_series` aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-time-series-aggregation.md), running on an index with the `time_series` index mode. + +Under these conditions the `geo_line` aggregation allocates memory to the `size` specified, and then fills that memory with the incoming documents. Once the memory is completely filled, documents from within the line are removed as new documents are added. The choice of document to remove is made to minimize the visual impact on the geometry. This process makes use of the [Visvalingam–Whyatt algorithm](https://en.wikipedia.org/wiki/Visvalingam%E2%80%93Whyatt_algorithm). Essentially this means points are removed if they have the minimum triangle area, with the triangle defined by the point under consideration and the two points before and after it in the line. In addition, we calculate the area using spherical coordinates so that no planar distortions affect the choice. + +In order to demonstrate how much better line simplification is to line truncation, consider this example of the north shore of Kodiak Island. The data for this is only 209 points, but if we want to set `size` to `100` we get dramatic truncation. + +![North short of Kodiak Island truncated to 100 points](../../../images/kodiak_geo_line_truncated.png "") + +The grey line is the entire geometry of 209 points, while the blue line is the first 100 points, a very different geometry than the original. + +Now consider the same geometry simplified to 100 points. + +![North short of Kodiak Island simplified to 100 points](../../../images/kodiak_geo_line_simplified.png "") + +For comparison we have shown the original in grey, the truncated in blue and the new simplified geometry in magenta. It is possible to see where the new simplified line deviates from the original, but the overall geometry appears almost identical and is still clearly recognizable as the north shore of Kodiak Island. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-geobounds-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-geobounds-aggregation.md new file mode 100644 index 0000000000000..ff6cfe8a8173c --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-geobounds-aggregation.md @@ -0,0 +1,144 @@ +--- +navigation_title: "Geo-bounds" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-geobounds-aggregation.html +--- + +# Geo-bounds aggregation [search-aggregations-metrics-geobounds-aggregation] + + +A metric aggregation that computes the geographic bounding box containing all values for a [Geopoint](/reference/elasticsearch/mapping-reference/geo-point.md) or [Geoshape](/reference/elasticsearch/mapping-reference/geo-shape.md) field. + +Example: + +```console +PUT /museums +{ + "mappings": { + "properties": { + "location": { + "type": "geo_point" + } + } + } +} + +POST /museums/_bulk?refresh +{"index":{"_id":1}} +{"location": "POINT (4.912350 52.374081)", "name": "NEMO Science Museum"} +{"index":{"_id":2}} +{"location": "POINT (4.901618 52.369219)", "name": "Museum Het Rembrandthuis"} +{"index":{"_id":3}} +{"location": "POINT (4.914722 52.371667)", "name": "Nederlands Scheepvaartmuseum"} +{"index":{"_id":4}} +{"location": "POINT (4.405200 51.222900)", "name": "Letterenhuis"} +{"index":{"_id":5}} +{"location": "POINT (2.336389 48.861111)", "name": "Musée du Louvre"} +{"index":{"_id":6}} +{"location": "POINT (2.327000 48.860000)", "name": "Musée d'Orsay"} + +POST /museums/_search?size=0 +{ + "query": { + "match": { "name": "musée" } + }, + "aggs": { + "viewport": { + "geo_bounds": { + "field": "location", <1> + "wrap_longitude": true <2> + } + } + } +} +``` + +1. The `geo_bounds` aggregation specifies the field to use to obtain the bounds, which must be a [Geopoint](/reference/elasticsearch/mapping-reference/geo-point.md) or a [Geoshape](/reference/elasticsearch/mapping-reference/geo-shape.md) type. +2. $$$geo-bounds-wrap-longitude$$$ `wrap_longitude` is an optional parameter which specifies whether the bounding box should be allowed to overlap the international date line. The default value is `true`. + + +The above aggregation demonstrates how one would compute the bounding box of the location field for all documents with a name matching "musée". + +The response for the above aggregation: + +```console-result +{ + ... + "aggregations": { + "viewport": { + "bounds": { + "top_left": { + "lat": 48.86111099738628, + "lon": 2.3269999679178 + }, + "bottom_right": { + "lat": 48.85999997612089, + "lon": 2.3363889567553997 + } + } + } + } +} +``` + + +## Geo Bounds Aggregation on `geo_shape` fields [geobounds-aggregation-geo-shape] + +The Geo Bounds Aggregation is also supported on `geo_shape` fields. + +If [`wrap_longitude`](#geo-bounds-wrap-longitude) is set to `true` (the default), the bounding box can overlap the international date line and return a bounds where the `top_left` longitude is larger than the `top_right` longitude. + +For example, the upper right longitude will typically be greater than the lower left longitude of a geographic bounding box. However, when the area crosses the 180° meridian, the value of the lower left longitude will be greater than the value of the upper right longitude. See [Geographic bounding box](http://docs.opengeospatial.org/is/12-063r5/12-063r5.md#30) on the Open Geospatial Consortium website for more information. + +Example: + +```console +PUT /places +{ + "mappings": { + "properties": { + "geometry": { + "type": "geo_shape" + } + } + } +} + +POST /places/_bulk?refresh +{"index":{"_id":1}} +{"name": "NEMO Science Museum", "geometry": "POINT(4.912350 52.374081)" } +{"index":{"_id":2}} +{"name": "Sportpark De Weeren", "geometry": { "type": "Polygon", "coordinates": [ [ [ 4.965305328369141, 52.39347642069457 ], [ 4.966979026794433, 52.391721758934835 ], [ 4.969425201416015, 52.39238958618537 ], [ 4.967944622039794, 52.39420969150824 ], [ 4.965305328369141, 52.39347642069457 ] ] ] } } + +POST /places/_search?size=0 +{ + "aggs": { + "viewport": { + "geo_bounds": { + "field": "geometry" + } + } + } +} +``` + +```console-result +{ + ... + "aggregations": { + "viewport": { + "bounds": { + "top_left": { + "lat": 52.39420966710895, + "lon": 4.912349972873926 + }, + "bottom_right": { + "lat": 52.374080987647176, + "lon": 4.969425117596984 + } + } + } + } +} +``` + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-geocentroid-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-geocentroid-aggregation.md new file mode 100644 index 0000000000000..eb8ad47acbeea --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-geocentroid-aggregation.md @@ -0,0 +1,213 @@ +--- +navigation_title: "Geo-centroid" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-geocentroid-aggregation.html +--- + +# Geo-centroid aggregation [search-aggregations-metrics-geocentroid-aggregation] + + +A metric aggregation that computes the weighted [centroid](https://en.wikipedia.org/wiki/Centroid) from all coordinate values for geo fields. + +Example: + +```console +PUT /museums +{ + "mappings": { + "properties": { + "location": { + "type": "geo_point" + } + } + } +} + +POST /museums/_bulk?refresh +{"index":{"_id":1}} +{"location": "POINT (4.912350 52.374081)", "city": "Amsterdam", "name": "NEMO Science Museum"} +{"index":{"_id":2}} +{"location": "POINT (4.901618 52.369219)", "city": "Amsterdam", "name": "Museum Het Rembrandthuis"} +{"index":{"_id":3}} +{"location": "POINT (4.914722 52.371667)", "city": "Amsterdam", "name": "Nederlands Scheepvaartmuseum"} +{"index":{"_id":4}} +{"location": "POINT (4.405200 51.222900)", "city": "Antwerp", "name": "Letterenhuis"} +{"index":{"_id":5}} +{"location": "POINT (2.336389 48.861111)", "city": "Paris", "name": "Musée du Louvre"} +{"index":{"_id":6}} +{"location": "POINT (2.327000 48.860000)", "city": "Paris", "name": "Musée d'Orsay"} + +POST /museums/_search?size=0 +{ + "aggs": { + "centroid": { + "geo_centroid": { + "field": "location" <1> + } + } + } +} +``` + +1. The `geo_centroid` aggregation specifies the field to use for computing the centroid. (NOTE: field must be a [Geopoint](/reference/elasticsearch/mapping-reference/geo-point.md) type) + + +The above aggregation demonstrates how one would compute the centroid of the location field for all museums' documents. + +The response for the above aggregation: + +```console-result +{ + ... + "aggregations": { + "centroid": { + "location": { + "lat": 51.00982965203002, + "lon": 3.9662131341174245 + }, + "count": 6 + } + } +} +``` + +The `geo_centroid` aggregation is more interesting when combined as a sub-aggregation to other bucket aggregations. + +Example: + +```console +POST /museums/_search?size=0 +{ + "aggs": { + "cities": { + "terms": { "field": "city.keyword" }, + "aggs": { + "centroid": { + "geo_centroid": { "field": "location" } + } + } + } + } +} +``` + +The above example uses `geo_centroid` as a sub-aggregation to a [terms](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md) bucket aggregation for finding the central location for museums in each city. + +The response for the above aggregation: + +```console-result +{ + ... + "aggregations": { + "cities": { + "sum_other_doc_count": 0, + "doc_count_error_upper_bound": 0, + "buckets": [ + { + "key": "Amsterdam", + "doc_count": 3, + "centroid": { + "location": { + "lat": 52.371655656024814, + "lon": 4.909563297405839 + }, + "count": 3 + } + }, + { + "key": "Paris", + "doc_count": 2, + "centroid": { + "location": { + "lat": 48.86055548675358, + "lon": 2.3316944623366 + }, + "count": 2 + } + }, + { + "key": "Antwerp", + "doc_count": 1, + "centroid": { + "location": { + "lat": 51.22289997059852, + "lon": 4.40519998781383 + }, + "count": 1 + } + } + ] + } + } +} +``` + + +## Geo Centroid Aggregation on `geo_shape` fields [geocentroid-aggregation-geo-shape] + +The centroid metric for geoshapes is more nuanced than for points. The centroid of a specific aggregation bucket containing shapes is the centroid of the highest-dimensionality shape type in the bucket. For example, if a bucket contains shapes comprising of polygons and lines, then the lines do not contribute to the centroid metric. Each type of shape’s centroid is calculated differently. Envelopes and circles ingested via the [Circle](/reference/ingestion-tools/enrich-processor/ingest-circle-processor.md) are treated as polygons. + +| Geometry Type | Centroid Calculation | +| --- | --- | +| [Multi]Point | equally weighted average of all the coordinates | +| [Multi]LineString | a weighted average of all the centroids of each segment, where the weight of each segment is its length in degrees | +| [Multi]Polygon | a weighted average of all the centroids of all the triangles of a polygon where the triangles are formed by every two consecutive vertices and the starting-point. holes have negative weights. weights represent the area of the triangle in deg^2 calculated | +| GeometryCollection | The centroid of all the underlying geometries with the highest dimension. If Polygons and Lines and/or Points, then lines and/or points are ignored. If Lines and Points, then points are ignored | + +Example: + +```console +PUT /places +{ + "mappings": { + "properties": { + "geometry": { + "type": "geo_shape" + } + } + } +} + +POST /places/_bulk?refresh +{"index":{"_id":1}} +{"name": "NEMO Science Museum", "geometry": "POINT(4.912350 52.374081)" } +{"index":{"_id":2}} +{"name": "Sportpark De Weeren", "geometry": { "type": "Polygon", "coordinates": [ [ [ 4.965305328369141, 52.39347642069457 ], [ 4.966979026794433, 52.391721758934835 ], [ 4.969425201416015, 52.39238958618537 ], [ 4.967944622039794, 52.39420969150824 ], [ 4.965305328369141, 52.39347642069457 ] ] ] } } + +POST /places/_search?size=0 +{ + "aggs": { + "centroid": { + "geo_centroid": { + "field": "geometry" + } + } + } +} +``` + +```console-result +{ + ... + "aggregations": { + "centroid": { + "location": { + "lat": 52.39296147599816, + "lon": 4.967404240742326 + }, + "count": 2 + } + } +} +``` + +::::{admonition} Using `geo_centroid` as a sub-aggregation of `geohash_grid` +:class: warning + +The [`geohash_grid`](/reference/data-analysis/aggregations/search-aggregations-bucket-geohashgrid-aggregation.md) aggregation places documents, not individual geopoints, into buckets. If a document’s `geo_point` field contains [multiple values](/reference/elasticsearch/mapping-reference/array.md), the document could be assigned to multiple buckets, even if one or more of its geopoints are outside the bucket boundaries. + +If a `geocentroid` sub-aggregation is also used, each centroid is calculated using all geopoints in a bucket, including those outside the bucket boundaries. This can result in centroids outside of bucket boundaries. + +:::: + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-max-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-max-aggregation.md new file mode 100644 index 0000000000000..9c2776976b973 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-max-aggregation.md @@ -0,0 +1,148 @@ +--- +navigation_title: "Max" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-max-aggregation.html +--- + +# Max aggregation [search-aggregations-metrics-max-aggregation] + + +A `single-value` metrics aggregation that keeps track and returns the maximum value among the numeric values extracted from the aggregated documents. + +::::{note} +The `min` and `max` aggregation operate on the `double` representation of the data. As a consequence, the result may be approximate when running on longs whose absolute value is greater than `2^53`. +:::: + + +Computing the max price value across all documents + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "max_price": { "max": { "field": "price" } } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "max_price": { + "value": 200.0 + } + } +} +``` + +As can be seen, the name of the aggregation (`max_price` above) also serves as the key by which the aggregation result can be retrieved from the returned response. + +## Script [_script_6] + +If you need to get the `max` of something more complex than a single field, run an aggregation on a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). + +```console +POST /sales/_search +{ + "size": 0, + "runtime_mappings": { + "price.adjusted": { + "type": "double", + "script": """ + double price = doc['price'].value; + if (doc['promoted'].value) { + price *= 0.8; + } + emit(price); + """ + } + }, + "aggs": { + "max_price": { + "max": { "field": "price.adjusted" } + } + } +} +``` + + +## Missing value [_missing_value_10] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +```console +POST /sales/_search +{ + "aggs" : { + "grade_max" : { + "max" : { + "field" : "grade", + "missing": 10 <1> + } + } + } +} +``` + +1. Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`. + + + +## Histogram fields [search-aggregations-metrics-max-aggregation-histogram-fields] + +When `max` is computed on [histogram fields](/reference/elasticsearch/mapping-reference/histogram.md), the result of the aggregation is the maximum of all elements in the `values` array. Note, that the `counts` array of the histogram is ignored. + +For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks: + +```console +PUT metrics_index +{ + "mappings": { + "properties": { + "latency_histo": { "type": "histogram" } + } + } +} + +PUT metrics_index/_doc/1?refresh +{ + "network.name" : "net-1", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], + "counts" : [3, 7, 23, 12, 6] + } +} + +PUT metrics_index/_doc/2?refresh +{ + "network.name" : "net-2", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], + "counts" : [8, 17, 8, 7, 6] + } +} + +POST /metrics_index/_search?size=0&filter_path=aggregations +{ + "aggs" : { + "max_latency" : { "max" : { "field" : "latency_histo" } } + } +} +``` + +The `max` aggregation will return the maximum value of all histogram fields: + +```console-result +{ + "aggregations": { + "max_latency": { + "value": 0.5 + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-median-absolute-deviation-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-median-absolute-deviation-aggregation.md new file mode 100644 index 0000000000000..3926373e337cc --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-median-absolute-deviation-aggregation.md @@ -0,0 +1,156 @@ +--- +navigation_title: "Median absolute deviation" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-median-absolute-deviation-aggregation.html +--- + +# Median absolute deviation aggregation [search-aggregations-metrics-median-absolute-deviation-aggregation] + + +This `single-value` aggregation approximates the [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) of its search results. + +Median absolute deviation is a measure of variability. It is a robust statistic, meaning that it is useful for describing data that may have outliers, or may not be normally distributed. For such data it can be more descriptive than standard deviation. + +It is calculated as the median of each data point’s deviation from the median of the entire sample. That is, for a random variable X, the median absolute deviation is median(|median(X) - Xi|). + +## Example [_example_5] + +Assume our data represents product reviews on a one to five star scale. Such reviews are usually summarized as a mean, which is easily understandable but doesn’t describe the reviews' variability. Estimating the median absolute deviation can provide insight into how much reviews vary from one another. + +In this example we have a product which has an average rating of 3 stars. Let’s look at its ratings' median absolute deviation to determine how much they vary + +```console +GET reviews/_search +{ + "size": 0, + "aggs": { + "review_average": { + "avg": { + "field": "rating" + } + }, + "review_variability": { + "median_absolute_deviation": { + "field": "rating" <1> + } + } + } +} +``` + +1. `rating` must be a numeric field + + +The resulting median absolute deviation of `2` tells us that there is a fair amount of variability in the ratings. Reviewers must have diverse opinions about this product. + +```console-result +{ + ... + "aggregations": { + "review_average": { + "value": 3.0 + }, + "review_variability": { + "value": 2.0 + } + } +} +``` + + +## Approximation [_approximation] + +The naive implementation of calculating median absolute deviation stores the entire sample in memory, so this aggregation instead calculates an approximation. It uses the [TDigest data structure](https://github.com/tdunning/t-digest) to approximate the sample median and the median of deviations from the sample median. For more about the approximation characteristics of TDigests, see [Percentiles are (usually) approximate](/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md#search-aggregations-metrics-percentile-aggregation-approximation). + +The tradeoff between resource usage and accuracy of a TDigest’s quantile approximation, and therefore the accuracy of this aggregation’s approximation of median absolute deviation, is controlled by the `compression` parameter. A higher `compression` setting provides a more accurate approximation at the cost of higher memory usage. For more about the characteristics of the TDigest `compression` parameter see [Compression](/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md#search-aggregations-metrics-percentile-aggregation-compression). + +```console +GET reviews/_search +{ + "size": 0, + "aggs": { + "review_variability": { + "median_absolute_deviation": { + "field": "rating", + "compression": 100 + } + } + } +} +``` + +The default `compression` value for this aggregation is `1000`. At this compression level this aggregation is usually within 5% of the exact result, but observed performance will depend on the sample data. + + +## Script [_script_7] + +In the example above, product reviews are on a scale of one to five. If you want to modify them to a scale of one to ten, use a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). + +```console +GET reviews/_search?filter_path=aggregations +{ + "size": 0, + "runtime_mappings": { + "rating.out_of_ten": { + "type": "long", + "script": { + "source": "emit(doc['rating'].value * params.scaleFactor)", + "params": { + "scaleFactor": 2 + } + } + } + }, + "aggs": { + "review_average": { + "avg": { + "field": "rating.out_of_ten" + } + }, + "review_variability": { + "median_absolute_deviation": { + "field": "rating.out_of_ten" + } + } + } +} +``` + +Which will result in: + +```console-result +{ + "aggregations": { + "review_average": { + "value": 6.0 + }, + "review_variability": { + "value": 4.0 + } + } +} +``` + + +## Missing value [_missing_value_11] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +Let’s be optimistic and assume some reviewers loved the product so much that they forgot to give it a rating. We’ll assign them five stars + +```console +GET reviews/_search +{ + "size": 0, + "aggs": { + "review_variability": { + "median_absolute_deviation": { + "field": "rating", + "missing": 5 + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-min-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-min-aggregation.md new file mode 100644 index 0000000000000..2c313a3d2dbef --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-min-aggregation.md @@ -0,0 +1,149 @@ +--- +navigation_title: "Min" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-min-aggregation.html +--- + +# Min aggregation [search-aggregations-metrics-min-aggregation] + + +A `single-value` metrics aggregation that keeps track and returns the minimum value among numeric values extracted from the aggregated documents. + +::::{note} +The `min` and `max` aggregation operate on the `double` representation of the data. As a consequence, the result may be approximate when running on longs whose absolute value is greater than `2^53`. +:::: + + +Computing the min price value across all documents: + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "min_price": { "min": { "field": "price" } } + } +} +``` + +Response: + +```console-result +{ + ... + + "aggregations": { + "min_price": { + "value": 10.0 + } + } +} +``` + +As can be seen, the name of the aggregation (`min_price` above) also serves as the key by which the aggregation result can be retrieved from the returned response. + +## Script [_script_8] + +If you need to get the `min` of something more complex than a single field, run the aggregation on a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). + +```console +POST /sales/_search +{ + "size": 0, + "runtime_mappings": { + "price.adjusted": { + "type": "double", + "script": """ + double price = doc['price'].value; + if (doc['promoted'].value) { + price *= 0.8; + } + emit(price); + """ + } + }, + "aggs": { + "min_price": { + "min": { "field": "price.adjusted" } + } + } +} +``` + + +## Missing value [_missing_value_12] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +```console +POST /sales/_search +{ + "aggs": { + "grade_min": { + "min": { + "field": "grade", + "missing": 10 <1> + } + } + } +} +``` + +1. Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`. + + + +## Histogram fields [search-aggregations-metrics-min-aggregation-histogram-fields] + +When `min` is computed on [histogram fields](/reference/elasticsearch/mapping-reference/histogram.md), the result of the aggregation is the minimum of all elements in the `values` array. Note, that the `counts` array of the histogram is ignored. + +For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks: + +```console +PUT metrics_index +{ + "mappings": { + "properties": { + "latency_histo": { "type": "histogram" } + } + } +} + +PUT metrics_index/_doc/1?refresh +{ + "network.name" : "net-1", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], + "counts" : [3, 7, 23, 12, 6] + } +} + +PUT metrics_index/_doc/2?refresh +{ + "network.name" : "net-2", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], + "counts" : [8, 17, 8, 7, 6] + } +} + +POST /metrics_index/_search?size=0&filter_path=aggregations +{ + "aggs" : { + "min_latency" : { "min" : { "field" : "latency_histo" } } + } +} +``` + +The `min` aggregation will return the minimum value of all histogram fields: + +```console-result +{ + "aggregations": { + "min_latency": { + "value": 0.1 + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md new file mode 100644 index 0000000000000..1d9c78d0991a1 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md @@ -0,0 +1,314 @@ +--- +navigation_title: "Percentiles" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-aggregation.html +--- + +# Percentiles aggregation [search-aggregations-metrics-percentile-aggregation] + + +A `multi-value` metrics aggregation that calculates one or more percentiles over numeric values extracted from the aggregated documents. These values can be extracted from specific numeric or [histogram fields](/reference/elasticsearch/mapping-reference/histogram.md) in the documents. + +Percentiles show the point at which a certain percentage of observed values occur. For example, the 95th percentile is the value which is greater than 95% of the observed values. + +Percentiles are often used to find outliers. In normal distributions, the 0.13th and 99.87th percentiles represents three standard deviations from the mean. Any data which falls outside three standard deviations is often considered an anomaly. + +When a range of percentiles are retrieved, they can be used to estimate the data distribution and determine if the data is skewed, bimodal, etc. + +Assume your data consists of website load times. The average and median load times are not overly useful to an administrator. The max may be interesting, but it can be easily skewed by a single slow response. + +Let’s look at a range of percentiles representing load time: + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_outlier": { + "percentiles": { + "field": "load_time" <1> + } + } + } +} +``` + +1. The field `load_time` must be a numeric field + + +By default, the `percentile` metric will generate a range of percentiles: `[ 1, 5, 25, 50, 75, 95, 99 ]`. The response will look like this: + +```console-result +{ + ... + + "aggregations": { + "load_time_outlier": { + "values": { + "1.0": 10.0, + "5.0": 30.0, + "25.0": 170.0, + "50.0": 445.0, + "75.0": 720.0, + "95.0": 940.0, + "99.0": 980.0 + } + } + } +} +``` + +As you can see, the aggregation will return a calculated value for each percentile in the default range. If we assume response times are in milliseconds, it is immediately obvious that the webpage normally loads in 10-720ms, but occasionally spikes to 940-980ms. + +Often, administrators are only interested in outliers — the extreme percentiles. We can specify just the percents we are interested in (requested percentiles must be a value between 0-100 inclusive): + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_outlier": { + "percentiles": { + "field": "load_time", + "percents": [ 95, 99, 99.9 ] <1> + } + } + } +} +``` + +1. Use the `percents` parameter to specify particular percentiles to calculate + + +## Keyed Response [_keyed_response_6] + +By default the `keyed` flag is set to `true` which associates a unique string key with each bucket and returns the ranges as a hash rather than an array. Setting the `keyed` flag to `false` will disable this behavior: + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_outlier": { + "percentiles": { + "field": "load_time", + "keyed": false + } + } + } +} +``` + +Response: + +```console-result +{ + ... + + "aggregations": { + "load_time_outlier": { + "values": [ + { + "key": 1.0, + "value": 10.0 + }, + { + "key": 5.0, + "value": 30.0 + }, + { + "key": 25.0, + "value": 170.0 + }, + { + "key": 50.0, + "value": 445.0 + }, + { + "key": 75.0, + "value": 720.0 + }, + { + "key": 95.0, + "value": 940.0 + }, + { + "key": 99.0, + "value": 980.0 + } + ] + } + } +} +``` + + +## Script [_script_10] + +If you need to run the aggregation against values that aren’t indexed, use a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). For example, if our load times are in milliseconds but you want percentiles calculated in seconds: + +```console +GET latency/_search +{ + "size": 0, + "runtime_mappings": { + "load_time.seconds": { + "type": "long", + "script": { + "source": "emit(doc['load_time'].value / params.timeUnit)", + "params": { + "timeUnit": 1000 + } + } + } + }, + "aggs": { + "load_time_outlier": { + "percentiles": { + "field": "load_time.seconds" + } + } + } +} +``` + + +## Percentiles are (usually) approximate [search-aggregations-metrics-percentile-aggregation-approximation] + +There are many different algorithms to calculate percentiles. The naive implementation simply stores all the values in a sorted array. To find the 50th percentile, you simply find the value that is at `my_array[count(my_array) * 0.5]`. + +Clearly, the naive implementation does not scale — the sorted array grows linearly with the number of values in your dataset. To calculate percentiles across potentially billions of values in an Elasticsearch cluster, *approximate* percentiles are calculated. + +The algorithm used by the `percentile` metric is called TDigest (introduced by Ted Dunning in [Computing Accurate Quantiles using T-Digests](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf)). + +When using this metric, there are a few guidelines to keep in mind: + +* Accuracy is proportional to `q(1-q)`. This means that extreme percentiles (e.g. 99%) are more accurate than less extreme percentiles, such as the median +* For small sets of values, percentiles are highly accurate (and potentially 100% accurate if the data is small enough). +* As the quantity of values in a bucket grows, the algorithm begins to approximate the percentiles. It is effectively trading accuracy for memory savings. The exact level of inaccuracy is difficult to generalize, since it depends on your data distribution and volume of data being aggregated + +The following chart shows the relative error on a uniform distribution depending on the number of collected values and the requested percentile: + +![percentiles error](../../../images/percentiles_error.png "") + +It shows how precision is better for extreme percentiles. The reason why error diminishes for large number of values is that the law of large numbers makes the distribution of values more and more uniform and the t-digest tree can do a better job at summarizing it. It would not be the case on more skewed distributions. + +::::{warning} +Percentile aggregations are also [non-deterministic](https://en.wikipedia.org/wiki/Nondeterministic_algorithm). This means you can get slightly different results using the same data. + +:::: + + + +## Compression [search-aggregations-metrics-percentile-aggregation-compression] + +Approximate algorithms must balance memory utilization with estimation accuracy. This balance can be controlled using a `compression` parameter: + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_outlier": { + "percentiles": { + "field": "load_time", + "tdigest": { + "compression": 200 <1> + } + } + } + } +} +``` + +1. Compression controls memory usage and approximation error + + +The TDigest algorithm uses a number of "nodes" to approximate percentiles — the more nodes available, the higher the accuracy (and large memory footprint) proportional to the volume of data. The `compression` parameter limits the maximum number of nodes to `20 * compression`. + +Therefore, by increasing the compression value, you can increase the accuracy of your percentiles at the cost of more memory. Larger compression values also make the algorithm slower since the underlying tree data structure grows in size, resulting in more expensive operations. The default compression value is `100`. + +A "node" uses roughly 32 bytes of memory, so under worst-case scenarios (large amount of data which arrives sorted and in-order) the default settings will produce a TDigest roughly 64KB in size. In practice data tends to be more random and the TDigest will use less memory. + + +## Execution hint [search-aggregations-metrics-percentile-aggregation-execution-hint] + +The default implementation of TDigest is optimized for performance, scaling to millions or even billions of sample values while maintaining acceptable accuracy levels (close to 1% relative error for millions of samples in some cases). There’s an option to use an implementation optimized for accuracy by setting parameter `execution_hint` to value `high_accuracy`: + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_outlier": { + "percentiles": { + "field": "load_time", + "tdigest": { + "execution_hint": "high_accuracy" <1> + } + } + } + } +} +``` + +1. Optimize TDigest for accuracy, at the expense of performance + + +This option can lead to improved accuracy (relative error close to 0.01% for millions of samples in some cases) but then percentile queries take 2x-10x longer to complete. + + +## HDR histogram [_hdr_histogram_2] + +[HDR Histogram](https://github.com/HdrHistogram/HdrHistogram) (High Dynamic Range Histogram) is an alternative implementation that can be useful when calculating percentiles for latency measurements as it can be faster than the t-digest implementation with the trade-off of a larger memory footprint. This implementation maintains a fixed worse-case percentage error (specified as a number of significant digits). This means that if data is recorded with values from 1 microsecond up to 1 hour (3,600,000,000 microseconds) in a histogram set to 3 significant digits, it will maintain a value resolution of 1 microsecond for values up to 1 millisecond and 3.6 seconds (or better) for the maximum tracked value (1 hour). + +The HDR Histogram can be used by specifying the `hdr` parameter in the request: + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_outlier": { + "percentiles": { + "field": "load_time", + "percents": [ 95, 99, 99.9 ], + "hdr": { <1> + "number_of_significant_value_digits": 3 <2> + } + } + } + } +} +``` + +1. `hdr` object indicates that HDR Histogram should be used to calculate the percentiles and specific settings for this algorithm can be specified inside the object +2. `number_of_significant_value_digits` specifies the resolution of values for the histogram in number of significant digits + + +The HDRHistogram only supports positive values and will error if it is passed a negative value. It is also not a good idea to use the HDRHistogram if the range of values is unknown as this could lead to high memory usage. + + +## Missing value [_missing_value_14] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "grade_percentiles": { + "percentiles": { + "field": "grade", + "missing": 10 <1> + } + } + } +} +``` + +1. Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-rank-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-rank-aggregation.md new file mode 100644 index 0000000000000..83b8897875b01 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-rank-aggregation.md @@ -0,0 +1,190 @@ +--- +navigation_title: "Percentile ranks" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-rank-aggregation.html +--- + +# Percentile ranks aggregation [search-aggregations-metrics-percentile-rank-aggregation] + + +A `multi-value` metrics aggregation that calculates one or more percentile ranks over numeric values extracted from the aggregated documents. These values can be extracted from specific numeric or [histogram fields](/reference/elasticsearch/mapping-reference/histogram.md) in the documents. + +::::{note} +Please see [Percentiles are (usually) approximate](/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md#search-aggregations-metrics-percentile-aggregation-approximation), [Compression](/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md#search-aggregations-metrics-percentile-aggregation-compression) and [Execution hint](/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md#search-aggregations-metrics-percentile-aggregation-execution-hint) for advice regarding approximation, performance and memory use of the percentile ranks aggregation + +:::: + + +Percentile rank show the percentage of observed values which are below certain value. For example, if a value is greater than or equal to 95% of the observed values it is said to be at the 95th percentile rank. + +Assume your data consists of website load times. You may have a service agreement that 95% of page loads complete within 500ms and 99% of page loads complete within 600ms. + +Let’s look at a range of percentiles representing load time: + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_ranks": { + "percentile_ranks": { + "field": "load_time", <1> + "values": [ 500, 600 ] + } + } + } +} +``` + +1. The field `load_time` must be a numeric field + + +The response will look like this: + +```console-result +{ + ... + + "aggregations": { + "load_time_ranks": { + "values": { + "500.0": 55.0, + "600.0": 64.0 + } + } + } +} +``` + +From this information you can determine you are hitting the 99% load time target but not quite hitting the 95% load time target + +## Keyed Response [_keyed_response_5] + +By default the `keyed` flag is set to `true` associates a unique string key with each bucket and returns the ranges as a hash rather than an array. Setting the `keyed` flag to `false` will disable this behavior: + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_ranks": { + "percentile_ranks": { + "field": "load_time", + "values": [ 500, 600 ], + "keyed": false + } + } + } +} +``` + +Response: + +```console-result +{ + ... + + "aggregations": { + "load_time_ranks": { + "values": [ + { + "key": 500.0, + "value": 55.0 + }, + { + "key": 600.0, + "value": 64.0 + } + ] + } + } +} +``` + + +## Script [_script_9] + +If you need to run the aggregation against values that aren’t indexed, use a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). For example, if our load times are in milliseconds but we want percentiles calculated in seconds: + +```console +GET latency/_search +{ + "size": 0, + "runtime_mappings": { + "load_time.seconds": { + "type": "long", + "script": { + "source": "emit(doc['load_time'].value / params.timeUnit)", + "params": { + "timeUnit": 1000 + } + } + } + }, + "aggs": { + "load_time_ranks": { + "percentile_ranks": { + "values": [ 500, 600 ], + "field": "load_time.seconds" + } + } + } +} +``` + + +## HDR Histogram [_hdr_histogram] + +[HDR Histogram](https://github.com/HdrHistogram/HdrHistogram) (High Dynamic Range Histogram) is an alternative implementation that can be useful when calculating percentile ranks for latency measurements as it can be faster than the t-digest implementation with the trade-off of a larger memory footprint. This implementation maintains a fixed worse-case percentage error (specified as a number of significant digits). This means that if data is recorded with values from 1 microsecond up to 1 hour (3,600,000,000 microseconds) in a histogram set to 3 significant digits, it will maintain a value resolution of 1 microsecond for values up to 1 millisecond and 3.6 seconds (or better) for the maximum tracked value (1 hour). + +The HDR Histogram can be used by specifying the `hdr` object in the request: + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_ranks": { + "percentile_ranks": { + "field": "load_time", + "values": [ 500, 600 ], + "hdr": { <1> + "number_of_significant_value_digits": 3 <2> + } + } + } + } +} +``` + +1. `hdr` object indicates that HDR Histogram should be used to calculate the percentiles and specific settings for this algorithm can be specified inside the object +2. `number_of_significant_value_digits` specifies the resolution of values for the histogram in number of significant digits + + +The HDRHistogram only supports positive values and will error if it is passed a negative value. It is also not a good idea to use the HDRHistogram if the range of values is unknown as this could lead to high memory usage. + + +## Missing value [_missing_value_13] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +```console +GET latency/_search +{ + "size": 0, + "aggs": { + "load_time_ranks": { + "percentile_ranks": { + "field": "load_time", + "values": [ 500, 600 ], + "missing": 10 <1> + } + } + } +} +``` + +1. Documents without a value in the `load_time` field will fall into the same bucket as documents that have the value `10`. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-rate-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-rate-aggregation.md new file mode 100644 index 0000000000000..15479da75465e --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-rate-aggregation.md @@ -0,0 +1,480 @@ +--- +navigation_title: "Rate" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-rate-aggregation.html +--- + +# Rate aggregation [search-aggregations-metrics-rate-aggregation] + + +A `rate` metrics aggregation can be used only inside a `date_histogram` or `composite` aggregation. It calculates a rate of documents or a field in each bucket. The field values can be extracted from specific numeric or [histogram fields](/reference/elasticsearch/mapping-reference/histogram.md) in the documents. + +::::{note} +For `composite` aggregations, there must be exactly one `date_histogram` source for the `rate` aggregation to be supported. +:::: + + +## Syntax [_syntax_5] + +A `rate` aggregation looks like this in isolation: + +```js +{ + "rate": { + "unit": "month", + "field": "requests" + } +} +``` + +The following request will group all sales records into monthly buckets and then convert the number of sales transactions in each bucket into per annual sales rate. + +```console +GET sales/_search +{ + "size": 0, + "aggs": { + "by_date": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" <1> + }, + "aggs": { + "my_rate": { + "rate": { + "unit": "year" <2> + } + } + } + } + } +} +``` + +1. Histogram is grouped by month. +2. But the rate is converted into annual rate. + + +The response will return the annual rate of transactions in each bucket. Since there are 12 months per year, the annual rate will be automatically calculated by multiplying the monthly rate by 12. + +```console-result +{ + ... + "aggregations" : { + "by_date" : { + "buckets" : [ + { + "key_as_string" : "2015/01/01 00:00:00", + "key" : 1420070400000, + "doc_count" : 3, + "my_rate" : { + "value" : 36.0 + } + }, + { + "key_as_string" : "2015/02/01 00:00:00", + "key" : 1422748800000, + "doc_count" : 2, + "my_rate" : { + "value" : 24.0 + } + }, + { + "key_as_string" : "2015/03/01 00:00:00", + "key" : 1425168000000, + "doc_count" : 2, + "my_rate" : { + "value" : 24.0 + } + } + ] + } + } +} +``` + +Instead of counting the number of documents, it is also possible to calculate a sum of all values of the fields in the documents in each bucket or the number of values in each bucket. The following request will group all sales records into monthly bucket and than calculate the total monthly sales and convert them into average daily sales. + +```console +GET sales/_search +{ + "size": 0, + "aggs": { + "by_date": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" <1> + }, + "aggs": { + "avg_price": { + "rate": { + "field": "price", <2> + "unit": "day" <3> + } + } + } + } + } +} +``` + +1. Histogram is grouped by month. +2. Calculate sum of all sale prices +3. Convert to average daily sales + + +The response will contain the average daily sale prices for each month. + +```console-result +{ + ... + "aggregations" : { + "by_date" : { + "buckets" : [ + { + "key_as_string" : "2015/01/01 00:00:00", + "key" : 1420070400000, + "doc_count" : 3, + "avg_price" : { + "value" : 17.741935483870968 + } + }, + { + "key_as_string" : "2015/02/01 00:00:00", + "key" : 1422748800000, + "doc_count" : 2, + "avg_price" : { + "value" : 2.142857142857143 + } + }, + { + "key_as_string" : "2015/03/01 00:00:00", + "key" : 1425168000000, + "doc_count" : 2, + "avg_price" : { + "value" : 12.096774193548388 + } + } + ] + } + } +} +``` + +You can also take advantage of `composite` aggregations to calculate the average daily sale price for each item in your inventory + +```console +GET sales/_search?filter_path=aggregations&size=0 +{ + "aggs": { + "buckets": { + "composite": { <1> + "sources": [ + { + "month": { + "date_histogram": { <2> + "field": "date", + "calendar_interval": "month" + } + } + }, + { + "type": { <3> + "terms": { + "field": "type" + } + } + } + ] + }, + "aggs": { + "avg_price": { + "rate": { + "field": "price", <4> + "unit": "day" <5> + } + } + } + } + } +} +``` + +1. Composite aggregation with a date histogram source and a source for the item type. +2. The date histogram source grouping monthly +3. The terms source grouping for each sale item type +4. Calculate sum of all sale prices, per month and item +5. Convert to average daily sales per item + + +The response will contain the average daily sale prices for each month per item. + +```console-result +{ + "aggregations" : { + "buckets" : { + "after_key" : { + "month" : 1425168000000, + "type" : "t-shirt" + }, + "buckets" : [ + { + "key" : { + "month" : 1420070400000, + "type" : "bag" + }, + "doc_count" : 1, + "avg_price" : { + "value" : 4.838709677419355 + } + }, + { + "key" : { + "month" : 1420070400000, + "type" : "hat" + }, + "doc_count" : 1, + "avg_price" : { + "value" : 6.451612903225806 + } + }, + { + "key" : { + "month" : 1420070400000, + "type" : "t-shirt" + }, + "doc_count" : 1, + "avg_price" : { + "value" : 6.451612903225806 + } + }, + { + "key" : { + "month" : 1422748800000, + "type" : "hat" + }, + "doc_count" : 1, + "avg_price" : { + "value" : 1.7857142857142858 + } + }, + { + "key" : { + "month" : 1422748800000, + "type" : "t-shirt" + }, + "doc_count" : 1, + "avg_price" : { + "value" : 0.35714285714285715 + } + }, + { + "key" : { + "month" : 1425168000000, + "type" : "hat" + }, + "doc_count" : 1, + "avg_price" : { + "value" : 6.451612903225806 + } + }, + { + "key" : { + "month" : 1425168000000, + "type" : "t-shirt" + }, + "doc_count" : 1, + "avg_price" : { + "value" : 5.645161290322581 + } + } + ] + } + } +} +``` + +By adding the `mode` parameter with the value `value_count`, we can change the calculation from `sum` to the number of values of the field: + +```console +GET sales/_search +{ + "size": 0, + "aggs": { + "by_date": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" <1> + }, + "aggs": { + "avg_number_of_sales_per_year": { + "rate": { + "field": "price", <2> + "unit": "year", <3> + "mode": "value_count" <4> + } + } + } + } + } +} +``` + +1. Histogram is grouped by month. +2. Calculate number of all sale prices +3. Convert to annual counts +4. Changing the mode to value count + + +The response will contain the average daily sale prices for each month. + +```console-result +{ + ... + "aggregations" : { + "by_date" : { + "buckets" : [ + { + "key_as_string" : "2015/01/01 00:00:00", + "key" : 1420070400000, + "doc_count" : 3, + "avg_number_of_sales_per_year" : { + "value" : 36.0 + } + }, + { + "key_as_string" : "2015/02/01 00:00:00", + "key" : 1422748800000, + "doc_count" : 2, + "avg_number_of_sales_per_year" : { + "value" : 24.0 + } + }, + { + "key_as_string" : "2015/03/01 00:00:00", + "key" : 1425168000000, + "doc_count" : 2, + "avg_number_of_sales_per_year" : { + "value" : 24.0 + } + } + ] + } + } +} +``` + +By default `sum` mode is used. + +`"mode": "sum"` +: calculate the sum of all values field + +`"mode": "value_count"` +: use the number of values in the field + + +## Relationship between bucket sizes and rate [_relationship_between_bucket_sizes_and_rate] + +The `rate` aggregation supports all rate that can be used [calendar_intervals parameter](/reference/data-analysis/aggregations/search-aggregations-bucket-datehistogram-aggregation.md#calendar_intervals) of `date_histogram` aggregation. The specified rate should compatible with the `date_histogram` aggregation interval, i.e. it should be possible to convert the bucket size into the rate. By default the interval of the `date_histogram` is used. + +`"rate": "second"` +: compatible with all intervals + +`"rate": "minute"` +: compatible with all intervals + +`"rate": "hour"` +: compatible with all intervals + +`"rate": "day"` +: compatible with all intervals + +`"rate": "week"` +: compatible with all intervals + +`"rate": "month"` +: compatible with only with `month`, `quarter` and `year` calendar intervals + +`"rate": "quarter"` +: compatible with only with `month`, `quarter` and `year` calendar intervals + +`"rate": "year"` +: compatible with only with `month`, `quarter` and `year` calendar intervals + +There is also an additional limitations if the date histogram is not a direct parent of the rate histogram. In this case both rate interval and histogram interval have to be in the same group: [`second`, ` minute`, `hour`, `day`, `week`] or [`month`, `quarter`, `year`]. For example, if the date histogram is `month` based, only rate intervals of `month`, `quarter` or `year` are supported. If the date histogram is `day` based, only `second`, ` minute`, `hour`, `day`, and `week` rate intervals are supported. + + +## Script [_script_11] + +If you need to run the aggregation against values that aren’t indexed, run the aggregation on a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). For example, if we need to adjust our prices before calculating rates: + +```console +GET sales/_search +{ + "size": 0, + "runtime_mappings": { + "price.adjusted": { + "type": "double", + "script": { + "source": "emit(doc['price'].value * params.adjustment)", + "params": { + "adjustment": 0.9 + } + } + } + }, + "aggs": { + "by_date": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "avg_price": { + "rate": { + "field": "price.adjusted" + } + } + } + } + } +} +``` + +```console-result +{ + ... + "aggregations" : { + "by_date" : { + "buckets" : [ + { + "key_as_string" : "2015/01/01 00:00:00", + "key" : 1420070400000, + "doc_count" : 3, + "avg_price" : { + "value" : 495.0 + } + }, + { + "key_as_string" : "2015/02/01 00:00:00", + "key" : 1422748800000, + "doc_count" : 2, + "avg_price" : { + "value" : 54.0 + } + }, + { + "key_as_string" : "2015/03/01 00:00:00", + "key" : 1425168000000, + "doc_count" : 2, + "avg_price" : { + "value" : 337.5 + } + } + ] + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-scripted-metric-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-scripted-metric-aggregation.md new file mode 100644 index 0000000000000..9defeda72a325 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-scripted-metric-aggregation.md @@ -0,0 +1,257 @@ +--- +navigation_title: "Scripted metric" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-scripted-metric-aggregation.html +--- + +# Scripted metric aggregation [search-aggregations-metrics-scripted-metric-aggregation] + + +A metric aggregation that executes using scripts to provide a metric output. + +::::{warning} +`scripted_metric` is not available in {{serverless-full}}. +:::: + + +::::{warning} +Using scripts can result in slower search speeds. See [Scripts, caching, and search speed](docs-content://explore-analyze/scripting/scripts-search-speed.md). When using a scripted metric aggregation, its intermediate state is serialized into an in-memory byte array for transmission to other nodes during the aggregation process. Consequently, a complex scripted metric aggregation may also encounter the 2GB limitation imposed on Java arrays. +:::: + + +Example: + +```console +POST ledger/_search?size=0 +{ + "query": { + "match_all": {} + }, + "aggs": { + "profit": { + "scripted_metric": { + "init_script": "state.transactions = []", <1> + "map_script": "state.transactions.add(doc.type.value == 'sale' ? doc.amount.value : -1 * doc.amount.value)", + "combine_script": "double profit = 0; for (t in state.transactions) { profit += t } return profit", + "reduce_script": "double profit = 0; for (a in states) { profit += a } return profit" + } + } + } +} +``` + +1. `init_script` is an optional parameter, all other scripts are required. + + +The above aggregation demonstrates how one would use the script aggregation compute the total profit from sale and cost transactions. + +The response for the above aggregation: + +```console-result +{ + "took": 218, + ... + "aggregations": { + "profit": { + "value": 240.0 + } + } +} +``` + +The above example can also be specified using stored scripts as follows: + +```console +POST ledger/_search?size=0 +{ + "aggs": { + "profit": { + "scripted_metric": { + "init_script": { + "id": "my_init_script" + }, + "map_script": { + "id": "my_map_script" + }, + "combine_script": { + "id": "my_combine_script" + }, + "params": { + "field": "amount" <1> + }, + "reduce_script": { + "id": "my_reduce_script" + } + } + } + } +} +``` + +1. script parameters for `init`, `map` and `combine` scripts must be specified in a global `params` object so that it can be shared between the scripts. + + +For more details on specifying scripts see [script documentation](docs-content://explore-analyze/scripting.md). + +## Allowed return types [scripted-metric-aggregation-return-types] + +Whilst any valid script object can be used within a single script, the scripts must return or store in the `state` object only the following types: + +* primitive types +* String +* Map (containing only keys and values of the types listed here) +* Array (containing elements of only the types listed here) + + +## Scope of scripts [scripted-metric-aggregation-scope] + +The scripted metric aggregation uses scripts at 4 stages of its execution: + +init_script +: Executed prior to any collection of documents. Allows the aggregation to set up any initial state. + + In the above example, the `init_script` creates an array `transactions` in the `state` object. + + +map_script +: Executed once per document collected. This is a required script. + + In the above example, the `map_script` checks the value of the type field. If the value is *sale* the value of the amount field is added to the transactions array. If the value of the type field is not *sale* the negated value of the amount field is added to transactions. + + +combine_script +: Executed once on each shard after document collection is complete. This is a required script. Allows the aggregation to consolidate the state returned from each shard. + + In the above example, the `combine_script` iterates through all the stored transactions, summing the values in the `profit` variable and finally returns `profit`. + + +reduce_script +: Executed once on the coordinating node after all shards have returned their results. This is a required script. The script is provided with access to a variable `states` which is an array of the result of the combine_script on each shard. + + In the above example, the `reduce_script` iterates through the `profit` returned by each shard summing the values before returning the final combined profit which will be returned in the response of the aggregation. + + + +## Worked example [scripted-metric-aggregation-example] + +Imagine a situation where you index the following documents into an index with 2 shards: + +```console +PUT /transactions/_bulk?refresh +{"index":{"_id":1}} +{"type": "sale","amount": 80} +{"index":{"_id":2}} +{"type": "cost","amount": 10} +{"index":{"_id":3}} +{"type": "cost","amount": 30} +{"index":{"_id":4}} +{"type": "sale","amount": 130} +``` + +Lets say that documents 1 and 3 end up on shard A and documents 2 and 4 end up on shard B. The following is a breakdown of what the aggregation result is at each stage of the example above. + +### Before init_script [_before_init_script] + +`state` is initialized as a new empty object. + +```js +"state" : {} +``` + + +### After init_script [_after_init_script] + +This is run once on each shard before any document collection is performed, and so we will have a copy on each shard: + +Shard A +: ```js +"state" : { + "transactions" : [] +} +``` + + +Shard B +: ```js +"state" : { + "transactions" : [] +} +``` + + + +### After map_script [_after_map_script] + +Each shard collects its documents and runs the map_script on each document that is collected: + +Shard A +: ```js +"state" : { + "transactions" : [ 80, -30 ] +} +``` + + +Shard B +: ```js +"state" : { + "transactions" : [ -10, 130 ] +} +``` + + + +### After combine_script [_after_combine_script] + +The combine_script is executed on each shard after document collection is complete and reduces all the transactions down to a single profit figure for each shard (by summing the values in the transactions array) which is passed back to the coordinating node: + +Shard A +: 50 + +Shard B +: 120 + + +### After reduce_script [_after_reduce_script] + +The reduce_script receives a `states` array containing the result of the combine script for each shard: + +```js +"states" : [ + 50, + 120 +] +``` + +It reduces the responses for the shards down to a final overall profit figure (by summing the values) and returns this as the result of the aggregation to produce the response: + +```js +{ + ... + + "aggregations": { + "profit": { + "value": 170 + } + } +} +``` + + + +## Other parameters [scripted-metric-aggregation-parameters] + +params +: Optional. An object whose contents will be passed as variables to the `init_script`, `map_script` and `combine_script`. This can be useful to allow the user to control the behavior of the aggregation and for storing state between the scripts. If this is not specified, the default is the equivalent of providing: + + ```js + "params" : {} + ``` + + + +## Empty buckets [scripted-metric-aggregation-empty-buckets] + +If a parent bucket of the scripted metric aggregation does not collect any documents an empty aggregation response will be returned from the shard with a `null` value. In this case the `reduce_script`'s `states` variable will contain `null` as a response from that shard. `reduce_script`'s should therefore expect and deal with `null` responses from shards. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-stats-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-stats-aggregation.md new file mode 100644 index 0000000000000..1fe99e261d240 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-stats-aggregation.md @@ -0,0 +1,93 @@ +--- +navigation_title: "Stats" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-stats-aggregation.html +--- + +# Stats aggregation [search-aggregations-metrics-stats-aggregation] + + +A `multi-value` metrics aggregation that computes stats over numeric values extracted from the aggregated documents. + +The stats that are returned consist of: `min`, `max`, `sum`, `count` and `avg`. + +Assuming the data consists of documents representing exams grades (between 0 and 100) of students + +```console +POST /exams/_search?size=0 +{ + "aggs": { + "grades_stats": { "stats": { "field": "grade" } } + } +} +``` + +The above aggregation computes the grades statistics over all documents. The aggregation type is `stats` and the `field` setting defines the numeric field of the documents the stats will be computed on. The above will return the following: + +```console-result +{ + ... + + "aggregations": { + "grades_stats": { + "count": 2, + "min": 50.0, + "max": 100.0, + "avg": 75.0, + "sum": 150.0 + } + } +} +``` + +The name of the aggregation (`grades_stats` above) also serves as the key by which the aggregation result can be retrieved from the returned response. + +## Script [_script_12] + +If you need to get the `stats` for something more complex than a single field, run the aggregation on a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). + +```console +POST /exams/_search +{ + "size": 0, + "runtime_mappings": { + "grade.weighted": { + "type": "double", + "script": """ + emit(doc['grade'].value * doc['weight'].value) + """ + } + }, + "aggs": { + "grades_stats": { + "stats": { + "field": "grade.weighted" + } + } + } +} +``` + + +## Missing value [_missing_value_15] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +```console +POST /exams/_search?size=0 +{ + "aggs": { + "grades_stats": { + "stats": { + "field": "grade", + "missing": 0 <1> + } + } + } +} +``` + +1. Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `0`. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-string-stats-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-string-stats-aggregation.md new file mode 100644 index 0000000000000..04d37af374c00 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-string-stats-aggregation.md @@ -0,0 +1,162 @@ +--- +navigation_title: "String stats" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-string-stats-aggregation.html +--- + +# String stats aggregation [search-aggregations-metrics-string-stats-aggregation] + + +A `multi-value` metrics aggregation that computes statistics over string values extracted from the aggregated documents. These values can be retrieved either from specific `keyword` fields. + +The string stats aggregation returns the following results: + +* `count` - The number of non-empty fields counted. +* `min_length` - The length of the shortest term. +* `max_length` - The length of the longest term. +* `avg_length` - The average length computed over all terms. +* `entropy` - The [Shannon Entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)) value computed over all terms collected by the aggregation. Shannon entropy quantifies the amount of information contained in the field. It is a very useful metric for measuring a wide range of properties of a data set, such as diversity, similarity, randomness etc. + +For example: + +```console +POST /my-index-000001/_search?size=0 +{ + "aggs": { + "message_stats": { "string_stats": { "field": "message.keyword" } } + } +} +``` + +The above aggregation computes the string statistics for the `message` field in all documents. The aggregation type is `string_stats` and the `field` parameter defines the field of the documents the stats will be computed on. The above will return the following: + +```console-result +{ + ... + + "aggregations": { + "message_stats": { + "count": 5, + "min_length": 24, + "max_length": 30, + "avg_length": 28.8, + "entropy": 3.94617750050791 + } + } +} +``` + +The name of the aggregation (`message_stats` above) also serves as the key by which the aggregation result can be retrieved from the returned response. + +## Character distribution [_character_distribution] + +The computation of the Shannon Entropy value is based on the probability of each character appearing in all terms collected by the aggregation. To view the probability distribution for all characters, we can add the `show_distribution` (default: `false`) parameter. + +```console +POST /my-index-000001/_search?size=0 +{ + "aggs": { + "message_stats": { + "string_stats": { + "field": "message.keyword", + "show_distribution": true <1> + } + } + } +} +``` + +1. Set the `show_distribution` parameter to `true`, so that probability distribution for all characters is returned in the results. + + +```console-result +{ + ... + + "aggregations": { + "message_stats": { + "count": 5, + "min_length": 24, + "max_length": 30, + "avg_length": 28.8, + "entropy": 3.94617750050791, + "distribution": { + " ": 0.1527777777777778, + "e": 0.14583333333333334, + "s": 0.09722222222222222, + "m": 0.08333333333333333, + "t": 0.0763888888888889, + "h": 0.0625, + "a": 0.041666666666666664, + "i": 0.041666666666666664, + "r": 0.041666666666666664, + "g": 0.034722222222222224, + "n": 0.034722222222222224, + "o": 0.034722222222222224, + "u": 0.034722222222222224, + "b": 0.027777777777777776, + "w": 0.027777777777777776, + "c": 0.013888888888888888, + "E": 0.006944444444444444, + "l": 0.006944444444444444, + "1": 0.006944444444444444, + "2": 0.006944444444444444, + "3": 0.006944444444444444, + "4": 0.006944444444444444, + "y": 0.006944444444444444 + } + } + } +} +``` + +The `distribution` object shows the probability of each character appearing in all terms. The characters are sorted by descending probability. + + +## Script [_script_13] + +If you need to get the `string_stats` for something more complex than a single field, run the aggregation on a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). + +```console +POST /my-index-000001/_search +{ + "size": 0, + "runtime_mappings": { + "message_and_context": { + "type": "keyword", + "script": """ + emit(doc['message.keyword'].value + ' ' + doc['context.keyword'].value) + """ + } + }, + "aggs": { + "message_stats": { + "string_stats": { "field": "message_and_context" } + } + } +} +``` + + +## Missing value [_missing_value_16] + +The `missing` parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value. + +```console +POST /my-index-000001/_search?size=0 +{ + "aggs": { + "message_stats": { + "string_stats": { + "field": "message.keyword", + "missing": "[empty message]" <1> + } + } + } +} +``` + +1. Documents without a value in the `message` field will be treated as documents that have the value `[empty message]`. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-sum-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-sum-aggregation.md new file mode 100644 index 0000000000000..cf97a4697b330 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-sum-aggregation.md @@ -0,0 +1,164 @@ +--- +navigation_title: "Sum" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-sum-aggregation.html +--- + +# Sum aggregation [search-aggregations-metrics-sum-aggregation] + + +A `single-value` metrics aggregation that sums up numeric values that are extracted from the aggregated documents. These values can be extracted either from specific numeric or [histogram](/reference/elasticsearch/mapping-reference/histogram.md) fields. + +Assuming the data consists of documents representing sales records we can sum the sale price of all hats with: + +```console +POST /sales/_search?size=0 +{ + "query": { + "constant_score": { + "filter": { + "match": { "type": "hat" } + } + } + }, + "aggs": { + "hat_prices": { "sum": { "field": "price" } } + } +} +``` + +Resulting in: + +```console-result +{ + ... + "aggregations": { + "hat_prices": { + "value": 450.0 + } + } +} +``` + +The name of the aggregation (`hat_prices` above) also serves as the key by which the aggregation result can be retrieved from the returned response. + +## Script [_script_14] + +If you need to get the `sum` for something more complex than a single field, run the aggregation on a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). + +```console +POST /sales/_search?size=0 +{ + "runtime_mappings": { + "price.weighted": { + "type": "double", + "script": """ + double price = doc['price'].value; + if (doc['promoted'].value) { + price *= 0.8; + } + emit(price); + """ + } + }, + "query": { + "constant_score": { + "filter": { + "match": { "type": "hat" } + } + } + }, + "aggs": { + "hat_prices": { + "sum": { + "field": "price.weighted" + } + } + } +} +``` + + +## Missing value [_missing_value_17] + +The `missing` parameter defines how documents that are missing a value should be treated. By default documents missing the value will be ignored but it is also possible to treat them as if they had a value. For example, this treats all hat sales without a price as being `100`. + +```console +POST /sales/_search?size=0 +{ + "query": { + "constant_score": { + "filter": { + "match": { "type": "hat" } + } + } + }, + "aggs": { + "hat_prices": { + "sum": { + "field": "price", + "missing": 100 + } + } + } +} +``` + + +## Histogram fields [search-aggregations-metrics-sum-aggregation-histogram-fields] + +When sum is computed on [histogram fields](/reference/elasticsearch/mapping-reference/histogram.md), the result of the aggregation is the sum of all elements in the `values` array multiplied by the number in the same position in the `counts` array. + +For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks: + +```console +PUT metrics_index +{ + "mappings": { + "properties": { + "latency_histo": { "type": "histogram" } + } + } +} + +PUT metrics_index/_doc/1?refresh +{ + "network.name" : "net-1", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], + "counts" : [3, 7, 23, 12, 6] + } +} + +PUT metrics_index/_doc/2?refresh +{ + "network.name" : "net-2", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], + "counts" : [8, 17, 8, 7, 6] + } +} + +POST /metrics_index/_search?size=0&filter_path=aggregations +{ + "aggs" : { + "total_latency" : { "sum" : { "field" : "latency_histo" } } + } +} +``` + +For each histogram field, the `sum` aggregation will add each number in the `values` array, multiplied by its associated count in the `counts` array. + +Eventually, it will add all values for all histograms and return the following result: + +```console-result +{ + "aggregations": { + "total_latency": { + "value": 28.8 + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-top-hits-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-top-hits-aggregation.md new file mode 100644 index 0000000000000..57ed6f15bb52b --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-top-hits-aggregation.md @@ -0,0 +1,435 @@ +--- +navigation_title: "Top hits" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-top-hits-aggregation.html +--- + +# Top hits aggregation [search-aggregations-metrics-top-hits-aggregation] + + +A `top_hits` metric aggregator keeps track of the most relevant document being aggregated. This aggregator is intended to be used as a sub aggregator, so that the top matching documents can be aggregated per bucket. + +::::{tip} +We do not recommend using `top_hits` as a top-level aggregation. If you want to group search hits, use the [`collapse`](/reference/elasticsearch/rest-apis/collapse-search-results.md) parameter instead. +:::: + + +The `top_hits` aggregator can effectively be used to group result sets by certain fields via a bucket aggregator. One or more bucket aggregators determines by which properties a result set get sliced into. + +## Options [_options_6] + +* `from` - The offset from the first result you want to fetch. +* `size` - The maximum number of top matching hits to return per bucket. By default the top three matching hits are returned. +* `sort` - How the top matching hits should be sorted. By default the hits are sorted by the score of the main query. + + +## Supported per hit features [_supported_per_hit_features] + +The top_hits aggregation returns regular search hits, because of this many per hit features can be supported: + +* [Highlighting](/reference/elasticsearch/rest-apis/highlighting.md) +* [Explain](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search) +* [Named queries](/reference/query-languages/query-dsl-bool-query.md#named-queries) +* [Search fields](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#search-fields-param) +* [Source filtering](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#source-filtering) +* [Stored fields](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#stored-fields) +* [Script fields](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#script-fields) +* [Doc value fields](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#docvalue-fields) +* [Include versions](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search) +* [Include Sequence Numbers and Primary Terms](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search) + +::::{important} +If you **only** need `docvalue_fields`, `size`, and `sort` then [Top metrics](/reference/data-analysis/aggregations/search-aggregations-metrics-top-metrics.md) might be a more efficient choice than the Top Hits Aggregation. +:::: + + +`top_hits` does not support the [`rescore`](/reference/elasticsearch/rest-apis/filter-search-results.md#rescore) parameter. Query rescoring applies only to search hits, not aggregation results. To change the scores used by aggregations, use a [`function_score`](/reference/query-languages/query-dsl-function-score-query.md) or [`script_score`](/reference/query-languages/query-dsl-script-score-query.md) query. + + +## Example [_example_6] + +In the following example we group the sales by type and per type we show the last sale. For each sale only the date and price fields are being included in the source. + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "top_tags": { + "terms": { + "field": "type", + "size": 3 + }, + "aggs": { + "top_sales_hits": { + "top_hits": { + "sort": [ + { + "date": { + "order": "desc" + } + } + ], + "_source": { + "includes": [ "date", "price" ] + }, + "size": 1 + } + } + } + } + } +} +``` + +Possible response: + +```console-result +{ + ... + "aggregations": { + "top_tags": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "hat", + "doc_count": 3, + "top_sales_hits": { + "hits": { + "total" : { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [ + { + "_index": "sales", + "_id": "AVnNBmauCQpcRyxw6ChK", + "_source": { + "date": "2015/03/01 00:00:00", + "price": 200 + }, + "sort": [ + 1425168000000 + ], + "_score": null + } + ] + } + } + }, + { + "key": "t-shirt", + "doc_count": 3, + "top_sales_hits": { + "hits": { + "total" : { + "value": 3, + "relation": "eq" + }, + "max_score": null, + "hits": [ + { + "_index": "sales", + "_id": "AVnNBmauCQpcRyxw6ChL", + "_source": { + "date": "2015/03/01 00:00:00", + "price": 175 + }, + "sort": [ + 1425168000000 + ], + "_score": null + } + ] + } + } + }, + { + "key": "bag", + "doc_count": 1, + "top_sales_hits": { + "hits": { + "total" : { + "value": 1, + "relation": "eq" + }, + "max_score": null, + "hits": [ + { + "_index": "sales", + "_id": "AVnNBmatCQpcRyxw6ChH", + "_source": { + "date": "2015/01/01 00:00:00", + "price": 150 + }, + "sort": [ + 1420070400000 + ], + "_score": null + } + ] + } + } + } + ] + } + } +} +``` + + +## Field collapse example [_field_collapse_example] + +Field collapsing or result grouping is a feature that logically groups a result set into groups and per group returns top documents. The ordering of the groups is determined by the relevancy of the first document in a group. In Elasticsearch this can be implemented via a bucket aggregator that wraps a `top_hits` aggregator as sub-aggregator. + +In the example below we search across crawled webpages. For each webpage we store the body and the domain the webpage belong to. By defining a `terms` aggregator on the `domain` field we group the result set of webpages by domain. The `top_hits` aggregator is then defined as sub-aggregator, so that the top matching hits are collected per bucket. + +Also a `max` aggregator is defined which is used by the `terms` aggregator’s order feature to return the buckets by relevancy order of the most relevant document in a bucket. + +```console +POST /sales/_search +{ + "query": { + "match": { + "body": "elections" + } + }, + "aggs": { + "top_sites": { + "terms": { + "field": "domain", + "order": { + "top_hit": "desc" + } + }, + "aggs": { + "top_tags_hits": { + "top_hits": {} + }, + "top_hit" : { + "max": { + "script": { + "source": "_score" + } + } + } + } + } + } +} +``` + +At the moment the `max` (or `min`) aggregator is needed to make sure the buckets from the `terms` aggregator are ordered according to the score of the most relevant webpage per domain. Unfortunately the `top_hits` aggregator can’t be used in the `order` option of the `terms` aggregator yet. + + +## top_hits support in a nested or reverse_nested aggregator [_top_hits_support_in_a_nested_or_reverse_nested_aggregator] + +If the `top_hits` aggregator is wrapped in a `nested` or `reverse_nested` aggregator then nested hits are being returned. Nested hits are in a sense hidden mini documents that are part of regular document where in the mapping a nested field type has been configured. The `top_hits` aggregator has the ability to un-hide these documents if it is wrapped in a `nested` or `reverse_nested` aggregator. Read more about nested in the [nested type mapping](/reference/elasticsearch/mapping-reference/nested.md). + +If nested type has been configured a single document is actually indexed as multiple Lucene documents and they share the same id. In order to determine the identity of a nested hit there is more needed than just the id, so that is why nested hits also include their nested identity. The nested identity is kept under the `_nested` field in the search hit and includes the array field and the offset in the array field the nested hit belongs to. The offset is zero based. + +Let’s see how it works with a real sample. Considering the following mapping: + +```console +PUT /sales +{ + "mappings": { + "properties": { + "tags": { "type": "keyword" }, + "comments": { <1> + "type": "nested", + "properties": { + "username": { "type": "keyword" }, + "comment": { "type": "text" } + } + } + } + } +} +``` + +1. The `comments` is an array that holds nested documents under the `product` object. + + +And some documents: + +```console +PUT /sales/_doc/1?refresh +{ + "tags": [ "car", "auto" ], + "comments": [ + { "username": "baddriver007", "comment": "This car could have better brakes" }, + { "username": "dr_who", "comment": "Where's the autopilot? Can't find it" }, + { "username": "ilovemotorbikes", "comment": "This car has two extra wheels" } + ] +} +``` + +It’s now possible to execute the following `top_hits` aggregation (wrapped in a `nested` aggregation): + +```console +POST /sales/_search +{ + "query": { + "term": { "tags": "car" } + }, + "aggs": { + "by_sale": { + "nested": { + "path": "comments" + }, + "aggs": { + "by_user": { + "terms": { + "field": "comments.username", + "size": 1 + }, + "aggs": { + "by_nested": { + "top_hits": {} + } + } + } + } + } + } +} +``` + +Top hits response snippet with a nested hit, which resides in the first slot of array field `comments`: + +```console-result +{ + ... + "aggregations": { + "by_sale": { + "by_user": { + "buckets": [ + { + "key": "baddriver007", + "doc_count": 1, + "by_nested": { + "hits": { + "total" : { + "value": 1, + "relation": "eq" + }, + "max_score": 0.3616575, + "hits": [ + { + "_index": "sales", + "_id": "1", + "_nested": { + "field": "comments", <1> + "offset": 0 <2> + }, + "_score": 0.3616575, + "_source": { + "comment": "This car could have better brakes", <3> + "username": "baddriver007" + } + } + ] + } + } + } + ... + ] + } + } + } +} +``` + +1. Name of the array field containing the nested hit +2. Position if the nested hit in the containing array +3. Source of the nested hit + + +If `_source` is requested then just the part of the source of the nested object is returned, not the entire source of the document. Also stored fields on the **nested** inner object level are accessible via `top_hits` aggregator residing in a `nested` or `reverse_nested` aggregator. + +Only nested hits will have a `_nested` field in the hit, non nested (regular) hits will not have a `_nested` field. + +The information in `_nested` can also be used to parse the original source somewhere else if `_source` isn’t enabled. + +If there are multiple levels of nested object types defined in mappings then the `_nested` information can also be hierarchical in order to express the identity of nested hits that are two layers deep or more. + +In the example below a nested hit resides in the first slot of the field `nested_grand_child_field` which then resides in the second slow of the `nested_child_field` field: + +```js +... +"hits": { + "total" : { + "value": 2565, + "relation": "eq" + }, + "max_score": 1, + "hits": [ + { + "_index": "a", + "_id": "1", + "_score": 1, + "_nested" : { + "field" : "nested_child_field", + "offset" : 1, + "_nested" : { + "field" : "nested_grand_child_field", + "offset" : 0 + } + } + "_source": ... + }, + ... + ] +} +... +``` + + +## Use in pipeline aggregations [_use_in_pipeline_aggregations] + +`top_hits` can be used in pipeline aggregations that consume a single value per bucket, such as `bucket_selector` that applies per bucket filtering, similar to using a HAVING clause in SQL. This requires setting `size` to 1, and specifying the right path for the value to be passed to the wrapping aggregator. The latter can be a `_source`, a `_sort` or a `_score` value. For example: + +```console +POST /sales/_search?size=0 +{ + "aggs": { + "top_tags": { + "terms": { + "field": "type", + "size": 3 + }, + "aggs": { + "top_sales_hits": { + "top_hits": { + "sort": [ + { + "date": { + "order": "desc" + } + } + ], + "_source": { + "includes": [ "date", "price" ] + }, + "size": 1 + } + }, + "having.top_salary": { + "bucket_selector": { + "buckets_path": { + "tp": "top_sales_hits[_source.price]" + }, + "script": "params.tp < 180" + } + } + } + } + } +} +``` + +The `bucket_path` uses the `top_hits` name `top_sales_hits` and a keyword for the field providing the aggregate value, namely `_source` field `price` in the example above. Other options include `top_sales_hits[_sort]`, for filtering on the sort value `date` above, and `top_sales_hits[_score]`, for filtering on the score of the top hit. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-top-metrics.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-top-metrics.md new file mode 100644 index 0000000000000..f49eebba715a5 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-top-metrics.md @@ -0,0 +1,500 @@ +--- +navigation_title: "Top metrics" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-top-metrics.html +--- + +# Top metrics aggregation [search-aggregations-metrics-top-metrics] + + +The `top_metrics` aggregation selects metrics from the document with the largest or smallest "sort" value. For example, this gets the value of the `m` field on the document with the largest value of `s`: + +$$$search-aggregations-metrics-top-metrics-simple$$$ + +```console +POST /test/_bulk?refresh +{"index": {}} +{"s": 1, "m": 3.1415} +{"index": {}} +{"s": 2, "m": 1.0} +{"index": {}} +{"s": 3, "m": 2.71828} +POST /test/_search?filter_path=aggregations +{ + "aggs": { + "tm": { + "top_metrics": { + "metrics": {"field": "m"}, + "sort": {"s": "desc"} + } + } + } +} +``` + +Which returns: + +```js +{ + "aggregations": { + "tm": { + "top": [ {"sort": [3], "metrics": {"m": 2.718280076980591 } } ] + } + } +} +``` + +`top_metrics` is fairly similar to [`top_hits`](/reference/data-analysis/aggregations/search-aggregations-metrics-top-hits-aggregation.md) in spirit but because it is more limited it is able to do its job using less memory and is often faster. + +## `sort` [_sort] + +The `sort` field in the metric request functions exactly the same as the `sort` field in the [search](/reference/elasticsearch/rest-apis/sort-search-results.md) request except: + +* It can’t be used on [binary](/reference/elasticsearch/mapping-reference/binary.md), [flattened](/reference/elasticsearch/mapping-reference/flattened.md), [ip](/reference/elasticsearch/mapping-reference/ip.md), [keyword](/reference/elasticsearch/mapping-reference/keyword.md), or [text](/reference/elasticsearch/mapping-reference/text.md) fields. +* It only supports a single sort value so which document wins ties is not specified. + +The metrics that the aggregation returns is the first hit that would be returned by the search request. So, + +`"sort": {"s": "desc"}` +: gets metrics from the document with the highest `s` + +`"sort": {"s": "asc"}` +: gets the metrics from the document with the lowest `s` + +`"sort": {"_geo_distance": {"location": "POINT (-78.6382 35.7796)"}}` +: gets metrics from the documents with `location` **closest** to `35.7796, -78.6382` + +`"sort": "_score"` +: gets metrics from the document with the highest score + + +## `metrics` [_metrics] + +`metrics` selects the fields of the "top" document to return. You can request a single metric with something like `"metrics": {"field": "m"}` or multiple metrics by requesting a list of metrics like `"metrics": [{"field": "m"}, {"field": "i"}`. + +`metrics.field` supports the following field types: + +* [`boolean`](/reference/elasticsearch/mapping-reference/boolean.md) +* [`ip`](/reference/elasticsearch/mapping-reference/ip.md) +* [keywords](/reference/elasticsearch/mapping-reference/keyword.md) +* [numbers](/reference/elasticsearch/mapping-reference/number.md) + +Except for keywords, [runtime fields](docs-content://manage-data/data-store/mapping/runtime-fields.md) for corresponding types are also supported. `metrics.field` doesn’t support fields with [array values](/reference/elasticsearch/mapping-reference/array.md). A `top_metric` aggregation on array values may return inconsistent results. + +The following example runs a `top_metrics` aggregation on several field types. + +$$$search-aggregations-metrics-top-metrics-list-of-metrics$$$ + +```console +PUT /test +{ + "mappings": { + "properties": { + "d": {"type": "date"} + } + } +} +POST /test/_bulk?refresh +{"index": {}} +{"s": 1, "m": 3.1415, "i": 1, "d": "2020-01-01T00:12:12Z", "t": "cat"} +{"index": {}} +{"s": 2, "m": 1.0, "i": 6, "d": "2020-01-02T00:12:12Z", "t": "dog"} +{"index": {}} +{"s": 3, "m": 2.71828, "i": -12, "d": "2019-12-31T00:12:12Z", "t": "chicken"} +POST /test/_search?filter_path=aggregations +{ + "aggs": { + "tm": { + "top_metrics": { + "metrics": [ + {"field": "m"}, + {"field": "i"}, + {"field": "d"}, + {"field": "t.keyword"} + ], + "sort": {"s": "desc"} + } + } + } +} +``` + +Which returns: + +```js +{ + "aggregations": { + "tm": { + "top": [ { + "sort": [3], + "metrics": { + "m": 2.718280076980591, + "i": -12, + "d": "2019-12-31T00:12:12.000Z", + "t.keyword": "chicken" + } + } ] + } + } +} +``` + + +## `missing` [_missing] + +The `missing` parameter defines how documents with a missing value are treated. By default, if any of the key components are missing, the entire document is ignored. It is possible to treat the missing components as if they had a value by using the `missing` parameter. + +```console +PUT /my-index +{ + "mappings": { + "properties": { + "nr": { "type": "integer" }, + "state": { "type": "keyword" } <1> + } + } +} +POST /my-index/_bulk?refresh +{"index": {}} +{"nr": 1, "state": "started"} +{"index": {}} +{"nr": 2, "state": "stopped"} +{"index": {}} +{"nr": 3, "state": "N/A"} +{"index": {}} +{"nr": 4} <2> +POST /my-index/_search?filter_path=aggregations +{ + "aggs": { + "my_top_metrics": { + "top_metrics": { + "metrics": { + "field": "state", + "missing": "N/A"}, <3> + "sort": {"nr": "desc"} + } + } + } +} +``` + +1. If you want to use an aggregation on textual content, it must be a `keyword` type field or you must enable fielddata on that field. +2. This document has a missing `state` field value. +3. The `missing` parameter defines that if `state` field has a missing value, it should be treated as if it had the `N/A` value. + + +The request results in the following response: + +```console-result +{ + "aggregations": { + "my_top_metrics": { + "top": [ + { + "sort": [ + 4 + ], + "metrics": { + "state": "N/A" + } + } + ] + } + } +} +``` + + +## `size` [_size_2] + +`top_metrics` can return the top few document’s worth of metrics using the size parameter: + +$$$search-aggregations-metrics-top-metrics-size$$$ + +```console +POST /test/_bulk?refresh +{"index": {}} +{"s": 1, "m": 3.1415} +{"index": {}} +{"s": 2, "m": 1.0} +{"index": {}} +{"s": 3, "m": 2.71828} +POST /test/_search?filter_path=aggregations +{ + "aggs": { + "tm": { + "top_metrics": { + "metrics": {"field": "m"}, + "sort": {"s": "desc"}, + "size": 3 + } + } + } +} +``` + +Which returns: + +```js +{ + "aggregations": { + "tm": { + "top": [ + {"sort": [3], "metrics": {"m": 2.718280076980591 } }, + {"sort": [2], "metrics": {"m": 1.0 } }, + {"sort": [1], "metrics": {"m": 3.1414999961853027 } } + ] + } + } +} +``` + +The default `size` is 1. The maximum default size is `10` because the aggregation’s working storage is "dense", meaning we allocate `size` slots for every bucket. `10` is a **very** conservative default maximum and you can raise it if you need to by changing the `top_metrics_max_size` index setting. But know that large sizes can take a fair bit of memory, especially if they are inside of an aggregation which makes many buckes like a large [terms aggregation](#search-aggregations-metrics-top-metrics-example-terms). If you till want to raise it, use something like: + +```console +PUT /test/_settings +{ + "top_metrics_max_size": 100 +} +``` + +::::{note} +If `size` is more than `1` the `top_metrics` aggregation can’t be the **target** of a sort. +:::: + + + +## Examples [_examples_3] + +### Use with terms [search-aggregations-metrics-top-metrics-example-terms] + +This aggregation should be quite useful inside of [`terms`](/reference/data-analysis/aggregations/search-aggregations-bucket-terms-aggregation.md) aggregation, to, say, find the last value reported by each server. + +$$$search-aggregations-metrics-top-metrics-terms$$$ + +```console +PUT /node +{ + "mappings": { + "properties": { + "ip": {"type": "ip"}, + "date": {"type": "date"} + } + } +} +POST /node/_bulk?refresh +{"index": {}} +{"ip": "192.168.0.1", "date": "2020-01-01T01:01:01", "m": 1} +{"index": {}} +{"ip": "192.168.0.1", "date": "2020-01-01T02:01:01", "m": 2} +{"index": {}} +{"ip": "192.168.0.2", "date": "2020-01-01T02:01:01", "m": 3} +POST /node/_search?filter_path=aggregations +{ + "aggs": { + "ip": { + "terms": { + "field": "ip" + }, + "aggs": { + "tm": { + "top_metrics": { + "metrics": {"field": "m"}, + "sort": {"date": "desc"} + } + } + } + } + } +} +``` + +Which returns: + +```js +{ + "aggregations": { + "ip": { + "buckets": [ + { + "key": "192.168.0.1", + "doc_count": 2, + "tm": { + "top": [ {"sort": ["2020-01-01T02:01:01.000Z"], "metrics": {"m": 2 } } ] + } + }, + { + "key": "192.168.0.2", + "doc_count": 1, + "tm": { + "top": [ {"sort": ["2020-01-01T02:01:01.000Z"], "metrics": {"m": 3 } } ] + } + } + ], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0 + } + } +} +``` + +Unlike `top_hits`, you can sort buckets by the results of this metric: + +```console +POST /node/_search?filter_path=aggregations +{ + "aggs": { + "ip": { + "terms": { + "field": "ip", + "order": {"tm.m": "desc"} + }, + "aggs": { + "tm": { + "top_metrics": { + "metrics": {"field": "m"}, + "sort": {"date": "desc"} + } + } + } + } + } +} +``` + +Which returns: + +```js +{ + "aggregations": { + "ip": { + "buckets": [ + { + "key": "192.168.0.2", + "doc_count": 1, + "tm": { + "top": [ {"sort": ["2020-01-01T02:01:01.000Z"], "metrics": {"m": 3 } } ] + } + }, + { + "key": "192.168.0.1", + "doc_count": 2, + "tm": { + "top": [ {"sort": ["2020-01-01T02:01:01.000Z"], "metrics": {"m": 2 } } ] + } + } + ], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0 + } + } +} +``` + + +### Mixed sort types [_mixed_sort_types] + +Sorting `top_metrics` by a field that has different types across different indices producs somewhat surprising results: floating point fields are always sorted independently of whole numbered fields. + +$$$search-aggregations-metrics-top-metrics-mixed-sort$$$ + +```console +POST /test/_bulk?refresh +{"index": {"_index": "test1"}} +{"s": 1, "m": 3.1415} +{"index": {"_index": "test1"}} +{"s": 2, "m": 1} +{"index": {"_index": "test2"}} +{"s": 3.1, "m": 2.71828} +POST /test*/_search?filter_path=aggregations +{ + "aggs": { + "tm": { + "top_metrics": { + "metrics": {"field": "m"}, + "sort": {"s": "asc"} + } + } + } +} +``` + +Which returns: + +```js +{ + "aggregations": { + "tm": { + "top": [ {"sort": [3.0999999046325684], "metrics": {"m": 2.718280076980591 } } ] + } + } +} +``` + +While this is better than an error it **probably** isn’t what you were going for. While it does lose some precision, you can explicitly cast the whole number fields to floating points with something like: + +```console +POST /test*/_search?filter_path=aggregations +{ + "aggs": { + "tm": { + "top_metrics": { + "metrics": {"field": "m"}, + "sort": {"s": {"order": "asc", "numeric_type": "double"}} + } + } + } +} +``` + +Which returns the much more expected: + +```js +{ + "aggregations": { + "tm": { + "top": [ {"sort": [1.0], "metrics": {"m": 3.1414999961853027 } } ] + } + } +} +``` + + +### Use in pipeline aggregations [_use_in_pipeline_aggregations_2] + +`top_metrics` can be used in pipeline aggregations that consume a single value per bucket, such as `bucket_selector` that applies per bucket filtering, similar to using a HAVING clause in SQL. This requires setting `size` to 1, and specifying the right path for the (single) metric to be passed to the wrapping aggregator. For example: + +```console +POST /test*/_search?filter_path=aggregations +{ + "aggs": { + "ip": { + "terms": { + "field": "ip" + }, + "aggs": { + "tm": { + "top_metrics": { + "metrics": {"field": "m"}, + "sort": {"s": "desc"}, + "size": 1 + } + }, + "having_tm": { + "bucket_selector": { + "buckets_path": { + "top_m": "tm[m]" + }, + "script": "params.top_m < 1000" + } + } + } + } + } +} +``` + +The `bucket_path` uses the `top_metrics` name `tm` and a keyword for the metric providing the aggregate value, namely `m`. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-ttest-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-ttest-aggregation.md new file mode 100644 index 0000000000000..95970a6a3584f --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-ttest-aggregation.md @@ -0,0 +1,175 @@ +--- +navigation_title: "T-test" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-ttest-aggregation.html +--- + +# T-test aggregation [search-aggregations-metrics-ttest-aggregation] + + +A `t_test` metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a Student’s t-distribution under the null hypothesis on numeric values extracted from the aggregated documents. In practice, this will tell you if the difference between two population means are statistically significant and did not occur by chance alone. + +## Syntax [_syntax_6] + +A `t_test` aggregation looks like this in isolation: + +```js +{ + "t_test": { + "a": "value_before", + "b": "value_after", + "type": "paired" + } +} +``` + +Assuming that we have a record of node start up times before and after upgrade, let’s look at a t-test to see if upgrade affected the node start up time in a meaningful way. + +```console +GET node_upgrade/_search +{ + "size": 0, + "aggs": { + "startup_time_ttest": { + "t_test": { + "a": { "field": "startup_time_before" }, <1> + "b": { "field": "startup_time_after" }, <2> + "type": "paired" <3> + } + } + } +} +``` + +1. The field `startup_time_before` must be a numeric field. +2. The field `startup_time_after` must be a numeric field. +3. Since we have data from the same nodes, we are using paired t-test. + + +The response will return the p-value or probability value for the test. It is the probability of obtaining results at least as extreme as the result processed by the aggregation, assuming that the null hypothesis is correct (which means there is no difference between population means). Smaller p-value means the null hypothesis is more likely to be incorrect and population means are indeed different. + +```console-result +{ + ... + + "aggregations": { + "startup_time_ttest": { + "value": 0.1914368843365979 <1> + } + } +} +``` + +1. The p-value. + + + +## T-Test Types [_t_test_types] + +The `t_test` aggregation supports unpaired and paired two-sample t-tests. The type of the test can be specified using the `type` parameter: + +`"type": "paired"` +: performs paired t-test + +`"type": "homoscedastic"` +: performs two-sample equal variance test + +`"type": "heteroscedastic"` +: performs two-sample unequal variance test (this is default) + + +## Filters [_filters] + +It is also possible to run unpaired t-test on different sets of records using filters. For example, if we want to test the difference of startup times before upgrade between two different groups of nodes, we use the same field `startup_time_before` by separate groups of nodes using terms filters on the group name field: + +```console +GET node_upgrade/_search +{ + "size": 0, + "aggs": { + "startup_time_ttest": { + "t_test": { + "a": { + "field": "startup_time_before", <1> + "filter": { + "term": { + "group": "A" <2> + } + } + }, + "b": { + "field": "startup_time_before", <3> + "filter": { + "term": { + "group": "B" <4> + } + } + }, + "type": "heteroscedastic" <5> + } + } + } +} +``` + +1. The field `startup_time_before` must be a numeric field. +2. Any query that separates two groups can be used here. +3. We are using the same field +4. but we are using different filters. +5. Since we have data from different nodes, we cannot use paired t-test. + + +```console-result +{ + ... + + "aggregations": { + "startup_time_ttest": { + "value": 0.2981858007281437 <1> + } + } +} +``` + +1. The p-value. + + +Populations don’t have to be in the same index. If data sets are located in different indices, the term filter on the [`_index`](/reference/elasticsearch/mapping-reference/mapping-index-field.md) field can be used to select populations. + + +## Script [_script_15] + +If you need to run the `t_test` on values that aren’t represented cleanly by a field you should, run the aggregation on a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). For example, if you want to adjust out load times for the before values: + +```console +GET node_upgrade/_search +{ + "size": 0, + "runtime_mappings": { + "startup_time_before.adjusted": { + "type": "long", + "script": { + "source": "emit(doc['startup_time_before'].value - params.adjustment)", + "params": { + "adjustment": 10 + } + } + } + }, + "aggs": { + "startup_time_ttest": { + "t_test": { + "a": { + "field": "startup_time_before.adjusted" + }, + "b": { + "field": "startup_time_after" + }, + "type": "paired" + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-valuecount-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-valuecount-aggregation.md new file mode 100644 index 0000000000000..318d131006299 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-valuecount-aggregation.md @@ -0,0 +1,118 @@ +--- +navigation_title: "Value count" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-valuecount-aggregation.html +--- + +# Value count aggregation [search-aggregations-metrics-valuecount-aggregation] + + +A `single-value` metrics aggregation that counts the number of values that are extracted from the aggregated documents. These values can be extracted either from specific fields in the documents, or be generated by a provided script. Typically, this aggregator will be used in conjunction with other single-value aggregations. For example, when computing the `avg` one might be interested in the number of values the average is computed over. + +`value_count` does not de-duplicate values, so even if a field has duplicates each value will be counted individually. + +```console +POST /sales/_search?size=0 +{ + "aggs" : { + "types_count" : { "value_count" : { "field" : "type" } } + } +} +``` + +Response: + +```console-result +{ + ... + "aggregations": { + "types_count": { + "value": 7 + } + } +} +``` + +The name of the aggregation (`types_count` above) also serves as the key by which the aggregation result can be retrieved from the returned response. + +## Script [_script_16] + +If you need to count something more complex than the values in a single field you should run the aggregation on a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). + +```console +POST /sales/_search +{ + "size": 0, + "runtime_mappings": { + "tags": { + "type": "keyword", + "script": """ + emit(doc['type'].value); + if (doc['promoted'].value) { + emit('hot'); + } + """ + } + }, + "aggs": { + "tags_count": { + "value_count": { + "field": "tags" + } + } + } +} +``` + + +## Histogram fields [search-aggregations-metrics-valuecount-aggregation-histogram-fields] + +When the `value_count` aggregation is computed on [histogram fields](/reference/elasticsearch/mapping-reference/histogram.md), the result of the aggregation is the sum of all numbers in the `counts` array of the histogram. + +For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks: + +```console +PUT metrics_index/_doc/1 +{ + "network.name" : "net-1", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], + "counts" : [3, 7, 23, 12, 6] <1> + } +} + +PUT metrics_index/_doc/2 +{ + "network.name" : "net-2", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], + "counts" : [8, 17, 8, 7, 6] <1> + } +} + +POST /metrics_index/_search?size=0 +{ + "aggs": { + "total_requests": { + "value_count": { "field": "latency_histo" } + } + } +} +``` + +1. For each histogram field the `value_count` aggregation will sum all numbers in the `counts` array. + +Eventually, the aggregation will add all values for all histograms and return the following result: + +```console-result +{ + ... + "aggregations": { + "total_requests": { + "value": 97 + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-metrics-weight-avg-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-weight-avg-aggregation.md new file mode 100644 index 0000000000000..ea83567f7fdff --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-metrics-weight-avg-aggregation.md @@ -0,0 +1,209 @@ +--- +navigation_title: "Weighted avg" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-weight-avg-aggregation.html +--- + +# Weighted avg aggregation [search-aggregations-metrics-weight-avg-aggregation] + + +A `single-value` metrics aggregation that computes the weighted average of numeric values that are extracted from the aggregated documents. These values can be extracted either from specific numeric fields in the documents. + +When calculating a regular average, each datapoint has an equal "weight" …​ it contributes equally to the final value. Weighted averages, on the other hand, weight each datapoint differently. The amount that each datapoint contributes to the final value is extracted from the document. + +As a formula, a weighted average is the `∑(value * weight) / ∑(weight)` + +A regular average can be thought of as a weighted average where every value has an implicit weight of `1`. + +$$$weighted-avg-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `value` | The configuration for the field or script that provides the values | Required | | +| `weight` | The configuration for the field or script that provides the weights | Required | | +| `format` | The numeric response formatter | Optional | | + +The `value` and `weight` objects have per-field specific configuration: + +$$$value-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `field` | The field that values should be extracted from | Required | | +| `missing` | A value to use if the field is missing entirely | Optional | | + +$$$weight-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `field` | The field that weights should be extracted from | Required | | +| `missing` | A weight to use if the field is missing entirely | Optional | | + +## Examples [_examples_4] + +If our documents have a `"grade"` field that holds a 0-100 numeric score, and a `"weight"` field which holds an arbitrary numeric weight, we can calculate the weighted average using: + +```console +POST /exams/_search +{ + "size": 0, + "aggs": { + "weighted_grade": { + "weighted_avg": { + "value": { + "field": "grade" + }, + "weight": { + "field": "weight" + } + } + } + } +} +``` + +Which yields a response like: + +```console-result +{ + ... + "aggregations": { + "weighted_grade": { + "value": 70.0 + } + } +} +``` + +While multiple values-per-field are allowed, only one weight is allowed. If the aggregation encounters a document that has more than one weight (e.g. the weight field is a multi-valued field) it will abort the search. If you have this situation, you should build a [Runtime field](#search-aggregations-metrics-weight-avg-aggregation-runtime-field) to combine those values into a single weight. + +This single weight will be applied independently to each value extracted from the `value` field. + +This example show how a single document with multiple values will be averaged with a single weight: + +```console +POST /exams/_doc?refresh +{ + "grade": [1, 2, 3], + "weight": 2 +} + +POST /exams/_search +{ + "size": 0, + "aggs": { + "weighted_grade": { + "weighted_avg": { + "value": { + "field": "grade" + }, + "weight": { + "field": "weight" + } + } + } + } +} +``` + +The three values (`1`, `2`, and `3`) will be included as independent values, all with the weight of `2`: + +```console-result +{ + ... + "aggregations": { + "weighted_grade": { + "value": 2.0 + } + } +} +``` + +The aggregation returns `2.0` as the result, which matches what we would expect when calculating by hand: `((1*2) + (2*2) + (3*2)) / (2+2+2) == 2` + + +## Runtime field [search-aggregations-metrics-weight-avg-aggregation-runtime-field] + +If you have to sum or weigh values that don’t quite line up with the indexed values, run the aggregation on a [runtime field](docs-content://manage-data/data-store/mapping/runtime-fields.md). + +```console +POST /exams/_doc?refresh +{ + "grade": 100, + "weight": [2, 3] +} +POST /exams/_doc?refresh +{ + "grade": 80, + "weight": 3 +} + +POST /exams/_search?filter_path=aggregations +{ + "size": 0, + "runtime_mappings": { + "weight.combined": { + "type": "double", + "script": """ + double s = 0; + for (double w : doc['weight']) { + s += w; + } + emit(s); + """ + } + }, + "aggs": { + "weighted_grade": { + "weighted_avg": { + "value": { + "script": "doc.grade.value + 1" + }, + "weight": { + "field": "weight.combined" + } + } + } + } +} +``` + +Which should look like: + +```console-result +{ + "aggregations": { + "weighted_grade": { + "value": 93.5 + } + } +} +``` + + +## Missing values [_missing_values_4] + +By default, the aggregation excludes documents with a missing or `null` value for the `value` or `weight` field. Use the `missing` parameter to specify a default value for these documents instead. + +```console +POST /exams/_search +{ + "size": 0, + "aggs": { + "weighted_grade": { + "weighted_avg": { + "value": { + "field": "grade", + "missing": 2 + }, + "weight": { + "field": "weight", + "missing": 3 + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-avg-bucket-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-avg-bucket-aggregation.md new file mode 100644 index 0000000000000..7fe9adeff60b9 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-avg-bucket-aggregation.md @@ -0,0 +1,128 @@ +--- +navigation_title: "Average bucket" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-avg-bucket-aggregation.html +--- + +# Average bucket aggregation [search-aggregations-pipeline-avg-bucket-aggregation] + + +A sibling pipeline aggregation which calculates the mean value of a specified metric in a sibling aggregation. The specified metric must be numeric and the sibling aggregation must be a multi-bucket aggregation. + +## Syntax [avg-bucket-agg-syntax] + +```js +"avg_bucket": { + "buckets_path": "sales_per_month>sales", + "gap_policy": "skip", + "format": "#,##0.00;(#,##0.00)" +} +``` + + +## Parameters [avg-bucket-params] + +`buckets_path` +: (Required, string) Path to the buckets to average. For syntax, see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax). + +`gap_policy` +: (Optional, string) Policy to apply when gaps are found in the data. For valid values, see [Dealing with gaps in the data](/reference/data-analysis/aggregations/pipeline.md#gap-policy). Defaults to `skip`. + +`format` +: (Optional, string) [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for the output value. If specified, the formatted value is returned in the aggregation’s `value_as_string` property. + + +## Response body [avg-bucket-agg-response] + +`value` +: (float) Mean average value for the metric specified in `buckets_path`. + +`value_as_string` +: (string) Formatted output value for the aggregation. This property is only provided if a `format` is specified in the request. + + +## Example [avg-bucket-agg-ex] + +The following `avg_monthly_sales` aggregation uses `avg_bucket` to calculate average sales per month: + +```console +POST _search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + } + } + }, + "avg_monthly_sales": { +// tag::avg-bucket-agg-syntax[] <1> + "avg_bucket": { + "buckets_path": "sales_per_month>sales", + "gap_policy": "skip", + "format": "#,##0.00;(#,##0.00)" + } +// end::avg-bucket-agg-syntax[] <2> + } + } +} +``` + +1. Start of the `avg_bucket` configuration. Comment is not part of the example. +2. End of the `avg_bucket` configuration. Comment is not part of the example. + + +The request returns the following response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "sales": { + "value": 550.0 + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "sales": { + "value": 60.0 + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "sales": { + "value": 375.0 + } + } + ] + }, + "avg_monthly_sales": { + "value": 328.33333333333333, + "value_as_string": "328.33" + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-bucket-script-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-bucket-script-aggregation.md new file mode 100644 index 0000000000000..20929b8e63f82 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-bucket-script-aggregation.md @@ -0,0 +1,155 @@ +--- +navigation_title: "Bucket script" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-bucket-script-aggregation.html +--- + +# Bucket script aggregation [search-aggregations-pipeline-bucket-script-aggregation] + + +A parent pipeline aggregation which executes a script which can perform per bucket computations on specified metrics in the parent multi-bucket aggregation. The specified metric must be numeric and the script must return a numeric value. + +## Syntax [bucket-script-agg-syntax] + +A `bucket_script` aggregation looks like this in isolation: + +```js +{ + "bucket_script": { + "buckets_path": { + "my_var1": "the_sum", <1> + "my_var2": "the_value_count" + }, + "script": "params.my_var1 / params.my_var2" + } +} +``` + +1. Here, `my_var1` is the name of the variable for this buckets path to use in the script, `the_sum` is the path to the metrics to use for that variable. + + +$$$bucket-script-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `script` | The script to run for this aggregation. The script can be inline, file or indexed. (see [Scripting](docs-content://explore-analyze/scripting.md)for more details) | Required | | +| `buckets_path` | A map of script variables and their associated path to the buckets we wish to use for the variable(see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details) | Required | | +| `gap_policy` | The policy to apply when gaps are found in the data (see [Dealing with gaps in the data](/reference/data-analysis/aggregations/pipeline.md#gap-policy) for more details) | Optional | `skip` | +| `format` | [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for theoutput value. If specified, the formatted value is returned in the aggregation’s`value_as_string` property | Optional | `null` | + +The following snippet calculates the ratio percentage of t-shirt sales compared to total sales each month: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "total_sales": { + "sum": { + "field": "price" + } + }, + "t-shirts": { + "filter": { + "term": { + "type": "t-shirt" + } + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + } + } + }, + "t-shirt-percentage": { + "bucket_script": { + "buckets_path": { + "tShirtSales": "t-shirts>sales", + "totalSales": "total_sales" + }, + "script": "params.tShirtSales / params.totalSales * 100" + } + } + } + } + } +} +``` + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "total_sales": { + "value": 550.0 + }, + "t-shirts": { + "doc_count": 1, + "sales": { + "value": 200.0 + } + }, + "t-shirt-percentage": { + "value": 36.36363636363637 + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "total_sales": { + "value": 60.0 + }, + "t-shirts": { + "doc_count": 1, + "sales": { + "value": 10.0 + } + }, + "t-shirt-percentage": { + "value": 16.666666666666664 + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "total_sales": { + "value": 375.0 + }, + "t-shirts": { + "doc_count": 1, + "sales": { + "value": 175.0 + } + }, + "t-shirt-percentage": { + "value": 46.666666666666664 + } + } + ] + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-bucket-selector-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-bucket-selector-aggregation.md new file mode 100644 index 0000000000000..04e39708f048b --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-bucket-selector-aggregation.md @@ -0,0 +1,112 @@ +--- +navigation_title: "Bucket selector" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-bucket-selector-aggregation.html +--- + +# Bucket selector aggregation [search-aggregations-pipeline-bucket-selector-aggregation] + + +A parent pipeline aggregation which executes a script which determines whether the current bucket will be retained in the parent multi-bucket aggregation. The specified metric must be numeric and the script must return a boolean value. If the script language is `expression` then a numeric return value is permitted. In this case 0.0 will be evaluated as `false` and all other values will evaluate to true. + +::::{note} +The bucket_selector aggregation, like all pipeline aggregations, executes after all other sibling aggregations. This means that using the bucket_selector aggregation to filter the returned buckets in the response does not save on execution time running the aggregations. +:::: + + +## Syntax [_syntax_9] + +A `bucket_selector` aggregation looks like this in isolation: + +```js +{ + "bucket_selector": { + "buckets_path": { + "my_var1": "the_sum", <1> + "my_var2": "the_value_count" + }, + "script": "params.my_var1 > params.my_var2" + } +} +``` + +1. Here, `my_var1` is the name of the variable for this buckets path to use in the script, `the_sum` is the path to the metrics to use for that variable. + + +$$$bucket-selector-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `script` | The script to run for this aggregation. The script can be inline, file or indexed. (see [Scripting](docs-content://explore-analyze/scripting.md)for more details) | Required | | +| `buckets_path` | A map of script variables and their associated path to the buckets we wish to use for the variable(see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details) | Required | | +| `gap_policy` | The policy to apply when gaps are found in the data (see [Dealing with gaps in the data](/reference/data-analysis/aggregations/pipeline.md#gap-policy) for more details) | Optional | `skip` | + +The following snippet only retains buckets where the total sales for the month is more than 200: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "total_sales": { + "sum": { + "field": "price" + } + }, + "sales_bucket_filter": { + "bucket_selector": { + "buckets_path": { + "totalSales": "total_sales" + }, + "script": "params.totalSales > 200" + } + } + } + } + } +} +``` + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "total_sales": { + "value": 550.0 + } + },<1> + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "total_sales": { + "value": 375.0 + } + } + ] + } + } +} +``` + +1. Bucket for `2015/02/01 00:00:00` has been removed as its total sales was less than 200 + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-bucket-sort-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-bucket-sort-aggregation.md new file mode 100644 index 0000000000000..5d702cbc9aaab --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-bucket-sort-aggregation.md @@ -0,0 +1,176 @@ +--- +navigation_title: "Bucket sort" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-bucket-sort-aggregation.html +--- + +# Bucket sort aggregation [search-aggregations-pipeline-bucket-sort-aggregation] + + +A parent pipeline aggregation which sorts the buckets of its parent multi-bucket aggregation. Zero or more sort fields may be specified together with the corresponding sort order. Each bucket may be sorted based on its `_key`, `_count` or its sub-aggregations. In addition, parameters `from` and `size` may be set in order to truncate the result buckets. + +::::{note} +The `bucket_sort` aggregation, like all pipeline aggregations, is executed after all other non-pipeline aggregations. This means the sorting only applies to whatever buckets are already returned from the parent aggregation. For example, if the parent aggregation is `terms` and its `size` is set to `10`, the `bucket_sort` will only sort over those 10 returned term buckets. +:::: + + +## Syntax [_syntax_10] + +A `bucket_sort` aggregation looks like this in isolation: + +```js +{ + "bucket_sort": { + "sort": [ + { "sort_field_1": { "order": "asc" } }, <1> + { "sort_field_2": { "order": "desc" } }, + "sort_field_3" + ], + "from": 1, + "size": 3 + } +} +``` + +1. Here, `sort_field_1` is the bucket path to the variable to be used as the primary sort and its order is ascending. + + +$$$bucket-sort-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `sort` | The list of fields to sort on. See [`sort`](/reference/elasticsearch/rest-apis/sort-search-results.md) for more details. | Optional | | +| `from` | Buckets in positions prior to the set value will be truncated. | Optional | `0` | +| `size` | The number of buckets to return. Defaults to all buckets of the parent aggregation. | Optional | | +| `gap_policy` | The policy to apply when gaps are found in the data (see [Dealing with gaps in the data](/reference/data-analysis/aggregations/pipeline.md#gap-policy) for more details) | Optional | `skip` | + +The following snippet returns the buckets corresponding to the 3 months with the highest total sales in descending order: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "total_sales": { + "sum": { + "field": "price" + } + }, + "sales_bucket_sort": { + "bucket_sort": { + "sort": [ + { "total_sales": { "order": "desc" } } <1> + ], + "size": 3 <2> + } + } + } + } + } +} +``` + +1. `sort` is set to use the values of `total_sales` in descending order +2. `size` is set to `3` meaning only the top 3 months in `total_sales` will be returned + + +And the following may be the response: + +```console-result +{ + "took": 82, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "total_sales": { + "value": 550.0 + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "total_sales": { + "value": 375.0 + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "total_sales": { + "value": 60.0 + } + } + ] + } + } +} +``` + + +## Truncating without sorting [_truncating_without_sorting] + +It is also possible to use this aggregation in order to truncate the result buckets without doing any sorting. To do so, just use the `from` and/or `size` parameters without specifying `sort`. + +The following example simply truncates the result so that only the second bucket is returned: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "bucket_truncate": { + "bucket_sort": { + "from": 1, + "size": 1 + } + } + } + } + } +} +``` + +Response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2 + } + ] + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-cumulative-cardinality-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-cumulative-cardinality-aggregation.md new file mode 100644 index 0000000000000..98834c9002f2a --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-cumulative-cardinality-aggregation.md @@ -0,0 +1,212 @@ +--- +navigation_title: "Cumulative cardinality" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-cumulative-cardinality-aggregation.html +--- + +# Cumulative cardinality aggregation [search-aggregations-pipeline-cumulative-cardinality-aggregation] + + +A parent pipeline aggregation which calculates the Cumulative Cardinality in a parent histogram (or date_histogram) aggregation. The specified metric must be a cardinality aggregation and the enclosing histogram must have `min_doc_count` set to `0` (default for `histogram` aggregations). + +The `cumulative_cardinality` agg is useful for finding "total new items", like the number of new visitors to your website each day. A regular cardinality aggregation will tell you how many unique visitors came each day, but doesn’t differentiate between "new" or "repeat" visitors. The Cumulative Cardinality aggregation can be used to determine how many of each day’s unique visitors are "new". + +## Syntax [_syntax_12] + +A `cumulative_cardinality` aggregation looks like this in isolation: + +```js +{ + "cumulative_cardinality": { + "buckets_path": "my_cardinality_agg" + } +} +``` + +$$$cumulative-cardinality-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | The path to the cardinality aggregation we wish to find the cumulative cardinality for (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details) | Required | | +| `format` | [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for theoutput value. If specified, the formatted value is returned in the aggregation’s`value_as_string` property | Optional | `null` | + +The following snippet calculates the cumulative cardinality of the total daily `users`: + +```console +GET /user_hits/_search +{ + "size": 0, + "aggs": { + "users_per_day": { + "date_histogram": { + "field": "timestamp", + "calendar_interval": "day" + }, + "aggs": { + "distinct_users": { + "cardinality": { + "field": "user_id" + } + }, + "total_new_users": { + "cumulative_cardinality": { + "buckets_path": "distinct_users" <1> + } + } + } + } + } +} +``` + +1. `buckets_path` instructs this aggregation to use the output of the `distinct_users` aggregation for the cumulative cardinality + + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "users_per_day": { + "buckets": [ + { + "key_as_string": "2019-01-01T00:00:00.000Z", + "key": 1546300800000, + "doc_count": 2, + "distinct_users": { + "value": 2 + }, + "total_new_users": { + "value": 2 + } + }, + { + "key_as_string": "2019-01-02T00:00:00.000Z", + "key": 1546387200000, + "doc_count": 2, + "distinct_users": { + "value": 2 + }, + "total_new_users": { + "value": 3 + } + }, + { + "key_as_string": "2019-01-03T00:00:00.000Z", + "key": 1546473600000, + "doc_count": 3, + "distinct_users": { + "value": 3 + }, + "total_new_users": { + "value": 4 + } + } + ] + } + } +} +``` + +Note how the second day, `2019-01-02`, has two distinct users but the `total_new_users` metric generated by the cumulative pipeline agg only increments to three. This means that only one of the two users that day were new, the other had already been seen in the previous day. This happens again on the third day, where only one of three users is completely new. + + +## Incremental cumulative cardinality [_incremental_cumulative_cardinality] + +The `cumulative_cardinality` agg will show you the total, distinct count since the beginning of the time period being queried. Sometimes, however, it is useful to see the "incremental" count. Meaning, how many new users are added each day, rather than the total cumulative count. + +This can be accomplished by adding a `derivative` aggregation to our query: + +```console +GET /user_hits/_search +{ + "size": 0, + "aggs": { + "users_per_day": { + "date_histogram": { + "field": "timestamp", + "calendar_interval": "day" + }, + "aggs": { + "distinct_users": { + "cardinality": { + "field": "user_id" + } + }, + "total_new_users": { + "cumulative_cardinality": { + "buckets_path": "distinct_users" + } + }, + "incremental_new_users": { + "derivative": { + "buckets_path": "total_new_users" + } + } + } + } + } +} +``` + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "users_per_day": { + "buckets": [ + { + "key_as_string": "2019-01-01T00:00:00.000Z", + "key": 1546300800000, + "doc_count": 2, + "distinct_users": { + "value": 2 + }, + "total_new_users": { + "value": 2 + } + }, + { + "key_as_string": "2019-01-02T00:00:00.000Z", + "key": 1546387200000, + "doc_count": 2, + "distinct_users": { + "value": 2 + }, + "total_new_users": { + "value": 3 + }, + "incremental_new_users": { + "value": 1.0 + } + }, + { + "key_as_string": "2019-01-03T00:00:00.000Z", + "key": 1546473600000, + "doc_count": 3, + "distinct_users": { + "value": 3 + }, + "total_new_users": { + "value": 4 + }, + "incremental_new_users": { + "value": 1.0 + } + } + ] + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-cumulative-sum-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-cumulative-sum-aggregation.md new file mode 100644 index 0000000000000..5c4d5504c08f9 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-cumulative-sum-aggregation.md @@ -0,0 +1,113 @@ +--- +navigation_title: "Cumulative sum" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-cumulative-sum-aggregation.html +--- + +# Cumulative sum aggregation [search-aggregations-pipeline-cumulative-sum-aggregation] + + +A parent pipeline aggregation which calculates the cumulative sum of a specified metric in a parent histogram (or date_histogram) aggregation. The specified metric must be numeric and the enclosing histogram must have `min_doc_count` set to `0` (default for `histogram` aggregations). + +## Syntax [_syntax_13] + +A `cumulative_sum` aggregation looks like this in isolation: + +```js +{ + "cumulative_sum": { + "buckets_path": "the_sum" + } +} +``` + +$$$cumulative-sum-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | The path to the buckets we wish to find the cumulative sum for (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details) | Required | | +| `format` | [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for theoutput value. If specified, the formatted value is returned in the aggregation’s`value_as_string` property | Optional | `null` | + +The following snippet calculates the cumulative sum of the total monthly `sales`: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + }, + "cumulative_sales": { + "cumulative_sum": { + "buckets_path": "sales" <1> + } + } + } + } + } +} +``` + +1. `buckets_path` instructs this cumulative sum aggregation to use the output of the `sales` aggregation for the cumulative sum + + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "sales": { + "value": 550.0 + }, + "cumulative_sales": { + "value": 550.0 + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "sales": { + "value": 60.0 + }, + "cumulative_sales": { + "value": 610.0 + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "sales": { + "value": 375.0 + }, + "cumulative_sales": { + "value": 985.0 + } + } + ] + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-derivative-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-derivative-aggregation.md new file mode 100644 index 0000000000000..3a1e40afa66e5 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-derivative-aggregation.md @@ -0,0 +1,299 @@ +--- +navigation_title: "Derivative" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-derivative-aggregation.html +--- + +# Derivative aggregation [search-aggregations-pipeline-derivative-aggregation] + + +A parent pipeline aggregation which calculates the derivative of a specified metric in a parent histogram (or date_histogram) aggregation. The specified metric must be numeric and the enclosing histogram must have `min_doc_count` set to `0` (default for `histogram` aggregations). + +## Syntax [_syntax_14] + +A `derivative` aggregation looks like this in isolation: + +```js +"derivative": { + "buckets_path": "the_sum" +} +``` + +$$$derivative-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | The path to the buckets we wish to find the derivative for (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details) | Required | | +| `gap_policy` | The policy to apply when gaps are found in the data (see [Dealing with gaps in the data](/reference/data-analysis/aggregations/pipeline.md#gap-policy) for more details) | Optional | `skip` | +| `format` | [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for theoutput value. If specified, the formatted value is returned in the aggregation’s`value_as_string` property | Optional | `null` | + + +## First Order Derivative [_first_order_derivative] + +The following snippet calculates the derivative of the total monthly `sales`: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + }, + "sales_deriv": { + "derivative": { + "buckets_path": "sales" <1> + } + } + } + } + } +} +``` + +1. `buckets_path` instructs this derivative aggregation to use the output of the `sales` aggregation for the derivative + + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "sales": { + "value": 550.0 + } <1> + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "sales": { + "value": 60.0 + }, + "sales_deriv": { + "value": -490.0 <2> + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, <3> + "sales": { + "value": 375.0 + }, + "sales_deriv": { + "value": 315.0 + } + } + ] + } + } +} +``` + +1. No derivative for the first bucket since we need at least 2 data points to calculate the derivative +2. Derivative value units are implicitly defined by the `sales` aggregation and the parent histogram so in this case the units would be $/month assuming the `price` field has units of $. +3. The number of documents in the bucket are represented by the `doc_count` + + + +## Second Order Derivative [_second_order_derivative] + +A second order derivative can be calculated by chaining the derivative pipeline aggregation onto the result of another derivative pipeline aggregation as in the following example which will calculate both the first and the second order derivative of the total monthly sales: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + }, + "sales_deriv": { + "derivative": { + "buckets_path": "sales" + } + }, + "sales_2nd_deriv": { + "derivative": { + "buckets_path": "sales_deriv" <1> + } + } + } + } + } +} +``` + +1. `buckets_path` for the second derivative points to the name of the first derivative + + +And the following may be the response: + +```console-result +{ + "took": 50, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "sales": { + "value": 550.0 + } <1> + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "sales": { + "value": 60.0 + }, + "sales_deriv": { + "value": -490.0 + } <1> + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "sales": { + "value": 375.0 + }, + "sales_deriv": { + "value": 315.0 + }, + "sales_2nd_deriv": { + "value": 805.0 + } + } + ] + } + } +} +``` + +1. No second derivative for the first two buckets since we need at least 2 data points from the first derivative to calculate the second derivative + + + +## Units [_units] + +The derivative aggregation allows the units of the derivative values to be specified. This returns an extra field in the response `normalized_value` which reports the derivative value in the desired x-axis units. In the below example we calculate the derivative of the total sales per month but ask for the derivative of the sales as in the units of sales per day: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + }, + "sales_deriv": { + "derivative": { + "buckets_path": "sales", + "unit": "day" <1> + } + } + } + } + } +} +``` + +1. `unit` specifies what unit to use for the x-axis of the derivative calculation + + +And the following may be the response: + +```console-result +{ + "took": 50, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "sales": { + "value": 550.0 + } <1> + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "sales": { + "value": 60.0 + }, + "sales_deriv": { + "value": -490.0, <1> + "normalized_value": -15.806451612903226 <2> + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "sales": { + "value": 375.0 + }, + "sales_deriv": { + "value": 315.0, + "normalized_value": 11.25 + } + } + ] + } + } +} +``` + +1. `value` is reported in the original units of *per month* +2. `normalized_value` is reported in the desired units of *per day* + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-extended-stats-bucket-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-extended-stats-bucket-aggregation.md new file mode 100644 index 0000000000000..57f8cd6e86ab5 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-extended-stats-bucket-aggregation.md @@ -0,0 +1,130 @@ +--- +navigation_title: "Extended stats bucket" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-extended-stats-bucket-aggregation.html +--- + +# Extended stats bucket aggregation [search-aggregations-pipeline-extended-stats-bucket-aggregation] + + +A sibling pipeline aggregation which calculates a variety of stats across all bucket of a specified metric in a sibling aggregation. The specified metric must be numeric and the sibling aggregation must be a multi-bucket aggregation. + +This aggregation provides a few more statistics (sum of squares, standard deviation, etc) compared to the `stats_bucket` aggregation. + +## Syntax [_syntax_15] + +A `extended_stats_bucket` aggregation looks like this in isolation: + +```js +{ + "extended_stats_bucket": { + "buckets_path": "the_sum" + } +} +``` + +$$$extended-stats-bucket-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | The path to the buckets we wish to calculate stats for (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details) | Required | | +| `gap_policy` | The policy to apply when gaps are found in the data (see [Dealing with gaps in the data](/reference/data-analysis/aggregations/pipeline.md#gap-policy) for more details) | Optional | `skip` | +| `format` | [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for theoutput value. If specified, the formatted value is returned in the aggregation’s`value_as_string` property | Optional | `null` | +| `sigma` | The number of standard deviations above/below the mean to display | Optional | 2 | + +The following snippet calculates the extended stats for monthly `sales` bucket: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + } + } + }, + "stats_monthly_sales": { + "extended_stats_bucket": { + "buckets_path": "sales_per_month>sales" <1> + } + } + } +} +``` + +1. `bucket_paths` instructs this `extended_stats_bucket` aggregation that we want the calculate stats for the `sales` aggregation in the `sales_per_month` date histogram. + + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "sales": { + "value": 550.0 + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "sales": { + "value": 60.0 + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "sales": { + "value": 375.0 + } + } + ] + }, + "stats_monthly_sales": { + "count": 3, + "min": 60.0, + "max": 550.0, + "avg": 328.3333333333333, + "sum": 985.0, + "sum_of_squares": 446725.0, + "variance": 41105.55555555556, + "variance_population": 41105.55555555556, + "variance_sampling": 61658.33333333334, + "std_deviation": 202.74505063146563, + "std_deviation_population": 202.74505063146563, + "std_deviation_sampling": 248.3109609609156, + "std_deviation_bounds": { + "upper": 733.8234345962646, + "lower": -77.15676792959795, + "upper_population" : 733.8234345962646, + "lower_population" : -77.15676792959795, + "upper_sampling" : 824.9552552551645, + "lower_sampling" : -168.28858858849787 + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-inference-bucket-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-inference-bucket-aggregation.md new file mode 100644 index 0000000000000..28cae9b986f58 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-inference-bucket-aggregation.md @@ -0,0 +1,166 @@ +--- +navigation_title: "{{infer-cap}} bucket" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-inference-bucket-aggregation.html +--- + +# {{infer-cap}} bucket aggregation [search-aggregations-pipeline-inference-bucket-aggregation] + + +A parent pipeline aggregation which loads a pre-trained model and performs {{infer}} on the collated result fields from the parent bucket aggregation. + +To use the {{infer}} bucket aggregation, you need to have the same security privileges that are required for using the [get trained models API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-get-trained-models). + +## Syntax [inference-bucket-agg-syntax] + +A `inference` aggregation looks like this in isolation: + +```js +{ + "inference": { + "model_id": "a_model_for_inference", <1> + "inference_config": { <2> + "regression_config": { + "num_top_feature_importance_values": 2 + } + }, + "buckets_path": { + "avg_cost": "avg_agg", <3> + "max_cost": "max_agg" + } + } +} +``` + +1. The unique identifier or alias for the trained model. +2. The optional inference config which overrides the model’s default settings +3. Map the value of `avg_agg` to the model’s input field `avg_cost` + + +$$$inference-bucket-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `model_id` | The ID or alias for the trained model. | Required | - | +| `inference_config` | Contains the inference type and its options. There are two types: [`regression`](#inference-agg-regression-opt) and [`classification`](#inference-agg-classification-opt) | Optional | - | +| `buckets_path` | Defines the paths to the input aggregations and maps the aggregation names to the field names expected by the model.See [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details | Required | - | + + +## Configuration options for {{infer}} models [_configuration_options_for_infer_models] + +The `inference_config` setting is optional and usually isn’t required as the pre-trained models come equipped with sensible defaults. In the context of aggregations some options can be overridden for each of the two types of model. + + +#### Configuration options for {{regression}} models [inference-agg-regression-opt] + +`num_top_feature_importance_values` +: (Optional, integer) Specifies the maximum number of [{{feat-imp}}](docs-content://explore-analyze/machine-learning/data-frame-analytics/ml-feature-importance.md) values per document. By default, it is zero and no {{feat-imp}} calculation occurs. + + +#### Configuration options for {{classification}} models [inference-agg-classification-opt] + +`num_top_classes` +: (Optional, integer) Specifies the number of top class predictions to return. Defaults to 0. + +`num_top_feature_importance_values` +: (Optional, integer) Specifies the maximum number of [{{feat-imp}}](docs-content://explore-analyze/machine-learning/data-frame-analytics/ml-feature-importance.md) values per document. Defaults to 0 which means no {{feat-imp}} calculation occurs. + +`prediction_field_type` +: (Optional, string) Specifies the type of the predicted field to write. Valid values are: `string`, `number`, `boolean`. When `boolean` is provided `1.0` is transformed to `true` and `0.0` to `false`. + + +## Example [inference-bucket-agg-example] + +The following snippet aggregates a web log by `client_ip` and extracts a number of features via metric and bucket sub-aggregations as input to the {{infer}} aggregation configured with a model trained to identify suspicious client IPs: + +```console +GET kibana_sample_data_logs/_search +{ + "size": 0, + "aggs": { + "client_ip": { <1> + "composite": { + "sources": [ + { + "client_ip": { + "terms": { + "field": "clientip" + } + } + } + ] + }, + "aggs": { <2> + "url_dc": { + "cardinality": { + "field": "url.keyword" + } + }, + "bytes_sum": { + "sum": { + "field": "bytes" + } + }, + "geo_src_dc": { + "cardinality": { + "field": "geo.src" + } + }, + "geo_dest_dc": { + "cardinality": { + "field": "geo.dest" + } + }, + "responses_total": { + "value_count": { + "field": "timestamp" + } + }, + "success": { + "filter": { + "term": { + "response": "200" + } + } + }, + "error404": { + "filter": { + "term": { + "response": "404" + } + } + }, + "error503": { + "filter": { + "term": { + "response": "503" + } + } + }, + "malicious_client_ip": { <3> + "inference": { + "model_id": "malicious_clients_model", + "buckets_path": { + "response_count": "responses_total", + "url_dc": "url_dc", + "bytes_sum": "bytes_sum", + "geo_src_dc": "geo_src_dc", + "geo_dest_dc": "geo_dest_dc", + "success": "success._count", + "error404": "error404._count", + "error503": "error503._count" + } + } + } + } + } + } +} +``` + +1. A composite bucket aggregation that aggregates the data by `client_ip`. +2. A series of metrics and bucket sub-aggregations. +3. {{infer-cap}} bucket aggregation that specifies the trained model and maps the aggregation names to the model’s input fields. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-max-bucket-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-max-bucket-aggregation.md new file mode 100644 index 0000000000000..fc066c009060f --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-max-bucket-aggregation.md @@ -0,0 +1,112 @@ +--- +navigation_title: "Max bucket" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-max-bucket-aggregation.html +--- + +# Max bucket aggregation [search-aggregations-pipeline-max-bucket-aggregation] + + +A sibling pipeline aggregation which identifies the bucket(s) with the maximum value of a specified metric in a sibling aggregation and outputs both the value and the key(s) of the bucket(s). The specified metric must be numeric and the sibling aggregation must be a multi-bucket aggregation. + +## Syntax [_syntax_16] + +A `max_bucket` aggregation looks like this in isolation: + +```js +{ + "max_bucket": { + "buckets_path": "the_sum" + } +} +``` + +$$$max-bucket-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | The path to the buckets we wish to find the maximum for (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details) | Required | | +| `gap_policy` | The policy to apply when gaps are found in the data (see [Dealing with gaps in the data](/reference/data-analysis/aggregations/pipeline.md#gap-policy) for more details) | Optional | `skip` | +| `format` | [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for theoutput value. If specified, the formatted value is returned in the aggregation’s`value_as_string` property | Optional | `null` | + +The following snippet calculates the maximum of the total monthly `sales`: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + } + } + }, + "max_monthly_sales": { + "max_bucket": { + "buckets_path": "sales_per_month>sales" <1> + } + } + } +} +``` + +1. `buckets_path` instructs this max_bucket aggregation that we want the maximum value of the `sales` aggregation in the `sales_per_month` date histogram. + + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "sales": { + "value": 550.0 + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "sales": { + "value": 60.0 + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "sales": { + "value": 375.0 + } + } + ] + }, + "max_monthly_sales": { + "keys": ["2015/01/01 00:00:00"], <1> + "value": 550.0 + } + } +} +``` + +1. `keys` is an array of strings since the maximum value may be present in multiple buckets + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-min-bucket-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-min-bucket-aggregation.md new file mode 100644 index 0000000000000..6c7129ca37e23 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-min-bucket-aggregation.md @@ -0,0 +1,112 @@ +--- +navigation_title: "Min bucket" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-min-bucket-aggregation.html +--- + +# Min bucket aggregation [search-aggregations-pipeline-min-bucket-aggregation] + + +A sibling pipeline aggregation which identifies the bucket(s) with the minimum value of a specified metric in a sibling aggregation and outputs both the value and the key(s) of the bucket(s). The specified metric must be numeric and the sibling aggregation must be a multi-bucket aggregation. + +## Syntax [_syntax_17] + +A `min_bucket` aggregation looks like this in isolation: + +```js +{ + "min_bucket": { + "buckets_path": "the_sum" + } +} +``` + +$$$min-bucket-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | The path to the buckets we wish to find the minimum for (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details) | Required | | +| `gap_policy` | The policy to apply when gaps are found in the data (see [Dealing with gaps in the data](/reference/data-analysis/aggregations/pipeline.md#gap-policy) for more details) | Optional | `skip` | +| `format` | [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for theoutput value. If specified, the formatted value is returned in the aggregation’s`value_as_string` property | Optional | `null` | + +The following snippet calculates the minimum of the total monthly `sales`: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + } + } + }, + "min_monthly_sales": { + "min_bucket": { + "buckets_path": "sales_per_month>sales" <1> + } + } + } +} +``` + +1. `buckets_path` instructs this min_bucket aggregation that we want the minimum value of the `sales` aggregation in the `sales_per_month` date histogram. + + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "sales": { + "value": 550.0 + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "sales": { + "value": 60.0 + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "sales": { + "value": 375.0 + } + } + ] + }, + "min_monthly_sales": { + "keys": ["2015/02/01 00:00:00"], <1> + "value": 60.0 + } + } +} +``` + +1. `keys` is an array of strings since the minimum value may be present in multiple buckets + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-movfn-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-movfn-aggregation.md new file mode 100644 index 0000000000000..a04ee179282a9 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-movfn-aggregation.md @@ -0,0 +1,570 @@ +--- +navigation_title: "Moving function" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-movfn-aggregation.html +--- + +# Moving function aggregation [search-aggregations-pipeline-movfn-aggregation] + + +Given an ordered series of data, the Moving Function aggregation will slide a window across the data and allow the user to specify a custom script that is executed on each window of data. For convenience, a number of common functions are predefined such as min/max, moving averages, etc. + +## Syntax [_syntax_18] + +A `moving_fn` aggregation looks like this in isolation: + +```js +{ + "moving_fn": { + "buckets_path": "the_sum", + "window": 10, + "script": "MovingFunctions.min(values)" + } +} +``` + +$$$moving-fn-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | Path to the metric of interest (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details | Required | | +| `window` | The size of window to "slide" across the histogram. | Required | | +| `script` | The script that should be executed on each window of data | Required | | +| `gap_policy` | The policy to apply when gaps are found in the data. See [Dealing with gaps in the data](/reference/data-analysis/aggregations/pipeline.md#gap-policy). | Optional | `skip` | +| `shift` | [Shift](#shift-parameter) of window position. | Optional | 0 | + +`moving_fn` aggregations must be embedded inside of a `histogram` or `date_histogram` aggregation. They can be embedded like any other metric aggregation: + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { <1> + "date_histogram": { + "field": "date", + "calendar_interval": "1M" + }, + "aggs": { + "the_sum": { + "sum": { "field": "price" } <2> + }, + "the_movfn": { + "moving_fn": { + "buckets_path": "the_sum", <3> + "window": 10, + "script": "MovingFunctions.unweightedAvg(values)" + } + } + } + } + } +} +``` + +1. A `date_histogram` named "my_date_histo" is constructed on the "timestamp" field, with one-month intervals +2. A `sum` metric is used to calculate the sum of a field. This could be any numeric metric (sum, min, max, etc) +3. Finally, we specify a `moving_fn` aggregation which uses "the_sum" metric as its input. + + +Moving averages are built by first specifying a `histogram` or `date_histogram` over a field. You can then optionally add numeric metrics, such as a `sum`, inside of that histogram. Finally, the `moving_fn` is embedded inside the histogram. The `buckets_path` parameter is then used to "point" at one of the sibling metrics inside of the histogram (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for a description of the syntax for `buckets_path`. + +An example response from the above aggregation may look like: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "my_date_histo": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "the_sum": { + "value": 550.0 + }, + "the_movfn": { + "value": null + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "the_sum": { + "value": 60.0 + }, + "the_movfn": { + "value": 550.0 + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "the_sum": { + "value": 375.0 + }, + "the_movfn": { + "value": 305.0 + } + } + ] + } + } +} +``` + + +## Custom user scripting [_custom_user_scripting] + +The Moving Function aggregation allows the user to specify any arbitrary script to define custom logic. The script is invoked each time a new window of data is collected. These values are provided to the script in the `values` variable. The script should then perform some kind of calculation and emit a single `double` as the result. Emitting `null` is not permitted, although `NaN` and +/- `Inf` are allowed. + +For example, this script will simply return the first value from the window, or `NaN` if no values are available: + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { + "date_histogram": { + "field": "date", + "calendar_interval": "1M" + }, + "aggs": { + "the_sum": { + "sum": { "field": "price" } + }, + "the_movavg": { + "moving_fn": { + "buckets_path": "the_sum", + "window": 10, + "script": "return values.length > 0 ? values[0] : Double.NaN" + } + } + } + } + } +} +``` + + +## shift parameter [shift-parameter] + +By default (with `shift = 0`), the window that is offered for calculation is the last `n` values excluding the current bucket. Increasing `shift` by 1 moves starting window position by `1` to the right. + +* To include current bucket to the window, use `shift = 1`. +* For center alignment (`n / 2` values before and after the current bucket), use `shift = window / 2`. +* For right alignment (`n` values after the current bucket), use `shift = window`. + +If either of window edges moves outside the borders of data series, the window shrinks to include available values only. + + +## Pre-built Functions [_pre_built_functions] + +For convenience, a number of functions have been prebuilt and are available inside the `moving_fn` script context: + +* `max()` +* `min()` +* `sum()` +* `stdDev()` +* `unweightedAvg()` +* `linearWeightedAvg()` +* `ewma()` +* `holt()` +* `holtWinters()` + +The functions are available from the `MovingFunctions` namespace. E.g. `MovingFunctions.max()` + +### max Function [_max_function] + +This function accepts a collection of doubles and returns the maximum value in that window. `null` and `NaN` values are ignored; the maximum is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `NaN` is returned as the result. + +$$$max-params$$$ + +| Parameter Name | Description | +| --- | --- | +| `values` | The window of values to find the maximum | + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { + "date_histogram": { + "field": "date", + "calendar_interval": "1M" + }, + "aggs": { + "the_sum": { + "sum": { "field": "price" } + }, + "the_moving_max": { + "moving_fn": { + "buckets_path": "the_sum", + "window": 10, + "script": "MovingFunctions.max(values)" + } + } + } + } + } +} +``` + + +### min Function [_min_function] + +This function accepts a collection of doubles and returns the minimum value in that window. `null` and `NaN` values are ignored; the minimum is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `NaN` is returned as the result. + +$$$min-params$$$ + +| Parameter Name | Description | +| --- | --- | +| `values` | The window of values to find the minimum | + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { + "date_histogram": { + "field": "date", + "calendar_interval": "1M" + }, + "aggs": { + "the_sum": { + "sum": { "field": "price" } + }, + "the_moving_min": { + "moving_fn": { + "buckets_path": "the_sum", + "window": 10, + "script": "MovingFunctions.min(values)" + } + } + } + } + } +} +``` + + +### sum Function [_sum_function] + +This function accepts a collection of doubles and returns the sum of the values in that window. `null` and `NaN` values are ignored; the sum is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `0.0` is returned as the result. + +$$$sum-params$$$ + +| Parameter Name | Description | +| --- | --- | +| `values` | The window of values to find the sum of | + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { + "date_histogram": { + "field": "date", + "calendar_interval": "1M" + }, + "aggs": { + "the_sum": { + "sum": { "field": "price" } + }, + "the_moving_sum": { + "moving_fn": { + "buckets_path": "the_sum", + "window": 10, + "script": "MovingFunctions.sum(values)" + } + } + } + } + } +} +``` + + +### stdDev Function [_stddev_function] + +This function accepts a collection of doubles and average, then returns the standard deviation of the values in that window. `null` and `NaN` values are ignored; the sum is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `0.0` is returned as the result. + +$$$stddev-params$$$ + +| Parameter Name | Description | +| --- | --- | +| `values` | The window of values to find the standard deviation of | +| `avg` | The average of the window | + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { + "date_histogram": { + "field": "date", + "calendar_interval": "1M" + }, + "aggs": { + "the_sum": { + "sum": { "field": "price" } + }, + "the_moving_sum": { + "moving_fn": { + "buckets_path": "the_sum", + "window": 10, + "script": "MovingFunctions.stdDev(values, MovingFunctions.unweightedAvg(values))" + } + } + } + } + } +} +``` + +The `avg` parameter must be provided to the standard deviation function because different styles of averages can be computed on the window (simple, linearly weighted, etc). The various moving averages that are detailed below can be used to calculate the average for the standard deviation function. + + +### unweightedAvg Function [_unweightedavg_function] + +The `unweightedAvg` function calculates the sum of all values in the window, then divides by the size of the window. It is effectively a simple arithmetic mean of the window. The simple moving average does not perform any time-dependent weighting, which means the values from a `simple` moving average tend to "lag" behind the real data. + +`null` and `NaN` values are ignored; the average is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` values. + +$$$unweightedavg-params$$$ + +| Parameter Name | Description | +| --- | --- | +| `values` | The window of values to find the sum of | + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { + "date_histogram": { + "field": "date", + "calendar_interval": "1M" + }, + "aggs": { + "the_sum": { + "sum": { "field": "price" } + }, + "the_movavg": { + "moving_fn": { + "buckets_path": "the_sum", + "window": 10, + "script": "MovingFunctions.unweightedAvg(values)" + } + } + } + } + } +} +``` + + + +## linearWeightedAvg Function [_linearweightedavg_function] + +The `linearWeightedAvg` function assigns a linear weighting to points in the series, such that "older" datapoints (e.g. those at the beginning of the window) contribute a linearly less amount to the total average. The linear weighting helps reduce the "lag" behind the data’s mean, since older points have less influence. + +If the window is empty, or all values are `null`/`NaN`, `NaN` is returned as the result. + +$$$linearweightedavg-params$$$ + +| Parameter Name | Description | +| --- | --- | +| `values` | The window of values to find the sum of | + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { + "date_histogram": { + "field": "date", + "calendar_interval": "1M" + }, + "aggs": { + "the_sum": { + "sum": { "field": "price" } + }, + "the_movavg": { + "moving_fn": { + "buckets_path": "the_sum", + "window": 10, + "script": "MovingFunctions.linearWeightedAvg(values)" + } + } + } + } + } +} +``` + + +## ewma Function [_ewma_function] + +The `ewma` function (aka "single-exponential") is similar to the `linearMovAvg` function, except older data-points become exponentially less important, rather than linearly less important. The speed at which the importance decays can be controlled with an `alpha` setting. Small values make the weight decay slowly, which provides greater smoothing and takes into account a larger portion of the window. Larger values make the weight decay quickly, which reduces the impact of older values on the moving average. This tends to make the moving average track the data more closely but with less smoothing. + +`null` and `NaN` values are ignored; the average is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` values. + +$$$ewma-params$$$ + +| Parameter Name | Description | +| --- | --- | +| `values` | The window of values to find the sum of | +| `alpha` | Exponential decay | + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { + "date_histogram": { + "field": "date", + "calendar_interval": "1M" + }, + "aggs": { + "the_sum": { + "sum": { "field": "price" } + }, + "the_movavg": { + "moving_fn": { + "buckets_path": "the_sum", + "window": 10, + "script": "MovingFunctions.ewma(values, 0.3)" + } + } + } + } + } +} +``` + + +## holt Function [_holt_function] + +The `holt` function (aka "double exponential") incorporates a second exponential term which tracks the data’s trend. Single exponential does not perform well when the data has an underlying linear trend. The double exponential model calculates two values internally: a "level" and a "trend". + +The level calculation is similar to `ewma`, and is an exponentially weighted view of the data. The difference is that the previously smoothed value is used instead of the raw value, which allows it to stay close to the original series. The trend calculation looks at the difference between the current and last value (e.g. the slope, or trend, of the smoothed data). The trend value is also exponentially weighted. + +Values are produced by multiplying the level and trend components. + +`null` and `NaN` values are ignored; the average is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` values. + +$$$holt-params$$$ + +| Parameter Name | Description | +| --- | --- | +| `values` | The window of values to find the sum of | +| `alpha` | Level decay value | +| `beta` | Trend decay value | + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { + "date_histogram": { + "field": "date", + "calendar_interval": "1M" + }, + "aggs": { + "the_sum": { + "sum": { "field": "price" } + }, + "the_movavg": { + "moving_fn": { + "buckets_path": "the_sum", + "window": 10, + "script": "MovingFunctions.holt(values, 0.3, 0.1)" + } + } + } + } + } +} +``` + +In practice, the `alpha` value behaves very similarly in `holtMovAvg` as `ewmaMovAvg`: small values produce more smoothing and more lag, while larger values produce closer tracking and less lag. The value of `beta` is often difficult to see. Small values emphasize long-term trends (such as a constant linear trend in the whole series), while larger values emphasize short-term trends. + + +## holtWinters Function [_holtwinters_function] + +The `holtWinters` function (aka "triple exponential") incorporates a third exponential term which tracks the seasonal aspect of your data. This aggregation therefore smooths based on three components: "level", "trend" and "seasonality". + +The level and trend calculation is identical to `holt` The seasonal calculation looks at the difference between the current point, and the point one period earlier. + +Holt-Winters requires a little more handholding than the other moving averages. You need to specify the "periodicity" of your data: e.g. if your data has cyclic trends every 7 days, you would set `period = 7`. Similarly if there was a monthly trend, you would set it to `30`. There is currently no periodicity detection, although that is planned for future enhancements. + +`null` and `NaN` values are ignored; the average is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` values. + +$$$holtwinters-params$$$ + +| Parameter Name | Description | +| --- | --- | +| `values` | The window of values to find the sum of | +| `alpha` | Level decay value | +| `beta` | Trend decay value | +| `gamma` | Seasonality decay value | +| `period` | The periodicity of the data | +| `multiplicative` | True if you wish to use multiplicative holt-winters, false to use additive | + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { + "date_histogram": { + "field": "date", + "calendar_interval": "1M" + }, + "aggs": { + "the_sum": { + "sum": { "field": "price" } + }, + "the_movavg": { + "moving_fn": { + "buckets_path": "the_sum", + "window": 10, + "script": "if (values.length > 5*2) {MovingFunctions.holtWinters(values, 0.3, 0.1, 0.1, 5, false)}" + } + } + } + } + } +} +``` + +::::{warning} +Multiplicative Holt-Winters works by dividing each data point by the seasonal value. This is problematic if any of your data is zero, or if there are gaps in the data (since this results in a divid-by-zero). To combat this, the `mult` Holt-Winters pads all values by a very small amount (1*10-10) so that all values are non-zero. This affects the result, but only minimally. If your data is non-zero, or you prefer to see `NaN` when zero’s are encountered, you can disable this behavior with `pad: false` + +:::: + + +### "Cold Start" [_cold_start] + +Unfortunately, due to the nature of Holt-Winters, it requires two periods of data to "bootstrap" the algorithm. This means that your `window` must always be **at least** twice the size of your period. An exception will be thrown if it isn’t. It also means that Holt-Winters will not emit a value for the first `2 * period` buckets; the current algorithm does not backcast. + +You’ll notice in the above example we have an `if ()` statement checking the size of values. This is checking to make sure we have two periods worth of data (`5 * 2`, where 5 is the period specified in the `holtWintersMovAvg` function) before calling the holt-winters function. + + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-moving-percentiles-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-moving-percentiles-aggregation.md new file mode 100644 index 0000000000000..1cab3ef611c66 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-moving-percentiles-aggregation.md @@ -0,0 +1,150 @@ +--- +navigation_title: "Moving percentiles" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-moving-percentiles-aggregation.html +--- + +# Moving percentiles aggregation [search-aggregations-pipeline-moving-percentiles-aggregation] + + +Given an ordered series of [percentiles](/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md), the Moving Percentile aggregation will slide a window across those percentiles and allow the user to compute the cumulative percentile. + +This is conceptually very similar to the [Moving Function](/reference/data-analysis/aggregations/search-aggregations-pipeline-movfn-aggregation.md) pipeline aggregation, except it works on the percentiles sketches instead of the actual buckets values. + +## Syntax [_syntax_19] + +A `moving_percentiles` aggregation looks like this in isolation: + +```js +{ + "moving_percentiles": { + "buckets_path": "the_percentile", + "window": 10 + } +} +``` + +$$$moving-percentiles-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | Path to the percentile of interest (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details | Required | | +| `window` | The size of window to "slide" across the histogram. | Required | | +| `shift` | [Shift](/reference/data-analysis/aggregations/search-aggregations-pipeline-movfn-aggregation.md#shift-parameter) of window position. | Optional | 0 | + +`moving_percentiles` aggregations must be embedded inside of a `histogram` or `date_histogram` aggregation. They can be embedded like any other metric aggregation: + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { <1> + "date_histogram": { + "field": "date", + "calendar_interval": "1M" + }, + "aggs": { + "the_percentile": { <2> + "percentiles": { + "field": "price", + "percents": [ 1.0, 99.0 ] + } + }, + "the_movperc": { + "moving_percentiles": { + "buckets_path": "the_percentile", <3> + "window": 10 + } + } + } + } + } +} +``` + +1. A `date_histogram` named "my_date_histo" is constructed on the "timestamp" field, with one-day intervals +2. A `percentile` metric is used to calculate the percentiles of a field. +3. Finally, we specify a `moving_percentiles` aggregation which uses "the_percentile" sketch as its input. + + +Moving percentiles are built by first specifying a `histogram` or `date_histogram` over a field. You then add a percentile metric inside of that histogram. Finally, the `moving_percentiles` is embedded inside the histogram. The `buckets_path` parameter is then used to "point" at the percentiles aggregation inside of the histogram (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for a description of the syntax for `buckets_path`). + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "my_date_histo": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "the_percentile": { + "values": { + "1.0": 151.0, + "99.0": 200.0 + } + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "the_percentile": { + "values": { + "1.0": 10.4, + "99.0": 49.6 + } + }, + "the_movperc": { + "values": { + "1.0": 151.0, + "99.0": 200.0 + } + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "the_percentile": { + "values": { + "1.0": 175.25, + "99.0": 199.75 + } + }, + "the_movperc": { + "values": { + "1.0": 11.6, + "99.0": 200.0 + } + } + } + ] + } + } +} +``` + +The output format of the `moving_percentiles` aggregation is inherited from the format of the referenced [`percentiles`](/reference/data-analysis/aggregations/search-aggregations-metrics-percentile-aggregation.md) aggregation. + +Moving percentiles pipeline aggregations always run with `skip` gap policy. + + +## shift parameter [moving-percentiles-shift-parameter] + +By default (with `shift = 0`), the window that is offered for calculation is the last `n` values excluding the current bucket. Increasing `shift` by 1 moves starting window position by `1` to the right. + +* To include current bucket to the window, use `shift = 1`. +* For center alignment (`n / 2` values before and after the current bucket), use `shift = window / 2`. +* For right alignment (`n` values after the current bucket), use `shift = window`. + +If either of window edges moves outside the borders of data series, the window shrinks to include available values only. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-normalize-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-normalize-aggregation.md new file mode 100644 index 0000000000000..441defd0e2384 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-normalize-aggregation.md @@ -0,0 +1,197 @@ +--- +navigation_title: "Normalize" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-normalize-aggregation.html +--- + +# Normalize aggregation [search-aggregations-pipeline-normalize-aggregation] + + +A parent pipeline aggregation which calculates the specific normalized/rescaled value for a specific bucket value. Values that cannot be normalized, will be skipped using the [skip gap policy](/reference/data-analysis/aggregations/pipeline.md#gap-policy). + +## Syntax [_syntax_20] + +A `normalize` aggregation looks like this in isolation: + +```js +{ + "normalize": { + "buckets_path": "normalized", + "method": "percent_of_sum" + } +} +``` + +$$$normalize_pipeline-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | The path to the buckets we wish to normalize (see [`buckets_path` syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details) | Required | | +| `method` | The specific [method](#normalize_pipeline-method) to apply | Required | | +| `format` | [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for theoutput value. If specified, the formatted value is returned in the aggregation’s`value_as_string` property | Optional | `null` | + + +## Methods [_methods] + +$$$normalize_pipeline-method$$$ +The Normalize Aggregation supports multiple methods to transform the bucket values. Each method definition will use the following original set of bucket values as examples: `[5, 5, 10, 50, 10, 20]`. + +*rescale_0_1* +: This method rescales the data such that the minimum number is zero, and the maximum number is 1, with the rest normalized linearly in-between. + + ``` + x' = (x - min_x) / (max_x - min_x) + ``` + ``` + [0, 0, .1111, 1, .1111, .3333] + ``` + + +*rescale_0_100* +: This method rescales the data such that the minimum number is zero, and the maximum number is 100, with the rest normalized linearly in-between. + + ``` + x' = 100 * (x - min_x) / (max_x - min_x) + ``` + ``` + [0, 0, 11.11, 100, 11.11, 33.33] + ``` + + +*percent_of_sum* +: This method normalizes each value so that it represents a percentage of the total sum it attributes to. + + ``` + x' = x / sum_x + ``` + ``` + [5%, 5%, 10%, 50%, 10%, 20%] + ``` + + +*mean* +: This method normalizes such that each value is normalized by how much it differs from the average. + + ``` + x' = (x - mean_x) / (max_x - min_x) + ``` + ``` + [4.63, 4.63, 9.63, 49.63, 9.63, 9.63, 19.63] + ``` + + +*z-score* +: This method normalizes such that each value represents how far it is from the mean relative to the standard deviation + + ``` + x' = (x - mean_x) / stdev_x + ``` + ``` + [-0.68, -0.68, -0.39, 1.94, -0.39, 0.19] + ``` + + +*softmax* +: This method normalizes such that each value is exponentiated and relative to the sum of the exponents of the original values. + + ``` + x' = e^x / sum_e_x + ``` + ``` + [2.862E-20, 2.862E-20, 4.248E-18, 0.999, 9.357E-14, 4.248E-18] + ``` + + + +## Example [_example_8] + +The following snippet calculates the percent of total sales for each month: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + }, + "percent_of_total_sales": { + "normalize": { + "buckets_path": "sales", <1> + "method": "percent_of_sum", <2> + "format": "00.00%" <3> + } + } + } + } + } +} +``` + +1. `buckets_path` instructs this normalize aggregation to use the output of the `sales` aggregation for rescaling +2. `method` sets which rescaling to apply. In this case, `percent_of_sum` will calculate the sales value as a percent of all sales in the parent bucket +3. `format` influences how to format the metric as a string using Java’s `DecimalFormat` pattern. In this case, multiplying by 100 and adding a *%* + + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "sales": { + "value": 550.0 + }, + "percent_of_total_sales": { + "value": 0.5583756345177665, + "value_as_string": "55.84%" + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "sales": { + "value": 60.0 + }, + "percent_of_total_sales": { + "value": 0.06091370558375635, + "value_as_string": "06.09%" + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "sales": { + "value": 375.0 + }, + "percent_of_total_sales": { + "value": 0.38071065989847713, + "value_as_string": "38.07%" + } + } + ] + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-percentiles-bucket-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-percentiles-bucket-aggregation.md new file mode 100644 index 0000000000000..e5a556f63366e --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-percentiles-bucket-aggregation.md @@ -0,0 +1,123 @@ +--- +navigation_title: "Percentiles bucket" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-percentiles-bucket-aggregation.html +--- + +# Percentiles bucket aggregation [search-aggregations-pipeline-percentiles-bucket-aggregation] + + +A sibling pipeline aggregation which calculates percentiles across all bucket of a specified metric in a sibling aggregation. The specified metric must be numeric and the sibling aggregation must be a multi-bucket aggregation. + +## Syntax [_syntax_21] + +A `percentiles_bucket` aggregation looks like this in isolation: + +```js +{ + "percentiles_bucket": { + "buckets_path": "the_sum" + } +} +``` + +$$$percentiles-bucket-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | The path to the buckets we wish to find the percentiles for (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details) | Required | | +| `gap_policy` | The policy to apply when gaps are found in the data (see [Dealing with gaps in the data](/reference/data-analysis/aggregations/pipeline.md#gap-policy) for more details) | Optional | `skip` | +| `format` | [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for theoutput value. If specified, the formatted value is returned in the aggregation’s`value_as_string` property | Optional | `null` | +| `percents` | The list of percentiles to calculate | Optional | `[ 1, 5, 25, 50, 75, 95, 99 ]` | +| `keyed` | Flag which returns the range as an hash instead of an array of key-value pairs | Optional | `true` | + +The following snippet calculates the percentiles for the total monthly `sales` buckets: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + } + } + }, + "percentiles_monthly_sales": { + "percentiles_bucket": { + "buckets_path": "sales_per_month>sales", <1> + "percents": [ 25.0, 50.0, 75.0 ] <2> + } + } + } +} +``` + +1. `buckets_path` instructs this percentiles_bucket aggregation that we want to calculate percentiles for the `sales` aggregation in the `sales_per_month` date histogram. +2. `percents` specifies which percentiles we wish to calculate, in this case, the 25th, 50th and 75th percentiles. + + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "sales": { + "value": 550.0 + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "sales": { + "value": 60.0 + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "sales": { + "value": 375.0 + } + } + ] + }, + "percentiles_monthly_sales": { + "values" : { + "25.0": 375.0, + "50.0": 375.0, + "75.0": 550.0 + } + } + } +} +``` + + +## Percentiles_bucket implementation [_percentiles_bucket_implementation] + +The percentiles are calculated exactly and is not an approximation (unlike the Percentiles Metric). This means the implementation maintains an in-memory, sorted list of your data to compute the percentiles, before discarding the data. You may run into memory pressure issues if you attempt to calculate percentiles over many millions of data-points in a single `percentiles_bucket`. + +The Percentile Bucket returns the nearest input data point to the requested percentile, rounding indices toward positive infinity; it does not interpolate between data points. For example, if there are eight data points and you request the `50%th` percentile, it will return the `4th` item because `ROUND_UP(.50 * (8-1))` is `4`. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-serialdiff-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-serialdiff-aggregation.md new file mode 100644 index 0000000000000..276712413b224 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-serialdiff-aggregation.md @@ -0,0 +1,93 @@ +--- +navigation_title: "Serial differencing" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-serialdiff-aggregation.html +--- + +# Serial differencing aggregation [search-aggregations-pipeline-serialdiff-aggregation] + + +Serial differencing is a technique where values in a time series are subtracted from itself at different time lags or periods. For example, the datapoint f(x) = f(xt) - f(xt-n), where n is the period being used. + +A period of 1 is equivalent to a derivative with no time normalization: it is simply the change from one point to the next. Single periods are useful for removing constant, linear trends. + +Single periods are also useful for transforming data into a stationary series. In this example, the Dow Jones is plotted over ~250 days. The raw data is not stationary, which would make it difficult to use with some techniques. + +By calculating the first-difference, we de-trend the data (e.g. remove a constant, linear trend). We can see that the data becomes a stationary series (e.g. the first difference is randomly distributed around zero, and doesn’t seem to exhibit any pattern/behavior). The transformation reveals that the dataset is following a random-walk; the value is the previous value +/- a random amount. This insight allows selection of further tools for analysis. + +:::{image} ../../../images/dow.png +:alt: dow +:title: Dow Jones plotted and made stationary with first-differencing +:name: serialdiff_dow +::: + +Larger periods can be used to remove seasonal / cyclic behavior. In this example, a population of lemmings was synthetically generated with a sine wave + constant linear trend + random noise. The sine wave has a period of 30 days. + +The first-difference removes the constant trend, leaving just a sine wave. The 30th-difference is then applied to the first-difference to remove the cyclic behavior, leaving a stationary series which is amenable to other analysis. + +:::{image} ../../../images/lemmings.png +:alt: lemmings +:title: Lemmings data plotted made stationary with 1st and 30th difference +:name: serialdiff_lemmings +::: + +## Syntax [_syntax_22] + +A `serial_diff` aggregation looks like this in isolation: + +```js +{ + "serial_diff": { + "buckets_path": "the_sum", + "lag": 7 + } +} +``` + +$$$serial-diff-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | Path to the metric of interest (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details | Required | | +| `lag` | The historical bucket to subtract from the current value. E.g. a lag of 7 will subtract the current value from the value 7 buckets ago. Must be a positive, non-zero integer | Optional | `1` | +| `gap_policy` | Determines what should happen when a gap in the data is encountered. | Optional | `insert_zeros` | +| `format` | [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for theoutput value. If specified, the formatted value is returned in the aggregation’s`value_as_string` property | Optional | `null` | + +`serial_diff` aggregations must be embedded inside of a `histogram` or `date_histogram` aggregation: + +```console +POST /_search +{ + "size": 0, + "aggs": { + "my_date_histo": { <1> + "date_histogram": { + "field": "timestamp", + "calendar_interval": "day" + }, + "aggs": { + "the_sum": { + "sum": { + "field": "lemmings" <2> + } + }, + "thirtieth_difference": { + "serial_diff": { <3> + "buckets_path": "the_sum", + "lag" : 30 + } + } + } + } + } +} +``` + +1. A `date_histogram` named "my_date_histo" is constructed on the "timestamp" field, with one-day intervals +2. A `sum` metric is used to calculate the sum of a field. This could be any metric (sum, min, max, etc) +3. Finally, we specify a `serial_diff` aggregation which uses "the_sum" metric as its input. + + +Serial differences are built by first specifying a `histogram` or `date_histogram` over a field. You can then optionally add normal metrics, such as a `sum`, inside of that histogram. Finally, the `serial_diff` is embedded inside the histogram. The `buckets_path` parameter is then used to "point" at one of the sibling metrics inside of the histogram (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for a description of the syntax for `buckets_path`. + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-stats-bucket-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-stats-bucket-aggregation.md new file mode 100644 index 0000000000000..c65bbfec150d6 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-stats-bucket-aggregation.md @@ -0,0 +1,112 @@ +--- +navigation_title: "Stats bucket" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-stats-bucket-aggregation.html +--- + +# Stats bucket aggregation [search-aggregations-pipeline-stats-bucket-aggregation] + + +A sibling pipeline aggregation which calculates a variety of stats across all bucket of a specified metric in a sibling aggregation. The specified metric must be numeric and the sibling aggregation must be a multi-bucket aggregation. + +## Syntax [_syntax_23] + +A `stats_bucket` aggregation looks like this in isolation: + +```js +{ + "stats_bucket": { + "buckets_path": "the_sum" + } +} +``` + +$$$stats-bucket-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | The path to the buckets we wish to calculate stats for (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details) | Required | | +| `gap_policy` | The policy to apply when gaps are found in the data (see [Dealing with gaps in the data](/reference/data-analysis/aggregations/pipeline.md#gap-policy) for more details) | Optional | `skip` | +| `format` | [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for theoutput value. If specified, the formatted value is returned in the aggregation’s`value_as_string` property | Optional | `null` | + +The following snippet calculates the stats for monthly `sales`: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + } + } + }, + "stats_monthly_sales": { + "stats_bucket": { + "buckets_path": "sales_per_month>sales" <1> + } + } + } +} +``` + +1. `bucket_paths` instructs this `stats_bucket` aggregation that we want the calculate stats for the `sales` aggregation in the `sales_per_month` date histogram. + + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "sales": { + "value": 550.0 + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "sales": { + "value": 60.0 + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "sales": { + "value": 375.0 + } + } + ] + }, + "stats_monthly_sales": { + "count": 3, + "min": 60.0, + "max": 550.0, + "avg": 328.3333333333333, + "sum": 985.0 + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-sum-bucket-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-sum-bucket-aggregation.md new file mode 100644 index 0000000000000..0ddbc1c87985d --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-pipeline-sum-bucket-aggregation.md @@ -0,0 +1,108 @@ +--- +navigation_title: "Sum bucket" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-sum-bucket-aggregation.html +--- + +# Sum bucket aggregation [search-aggregations-pipeline-sum-bucket-aggregation] + + +A sibling pipeline aggregation which calculates the sum across all buckets of a specified metric in a sibling aggregation. The specified metric must be numeric and the sibling aggregation must be a multi-bucket aggregation. + +## Syntax [_syntax_24] + +A `sum_bucket` aggregation looks like this in isolation: + +```js +{ + "sum_bucket": { + "buckets_path": "the_sum" + } +} +``` + +$$$sum-bucket-params$$$ + +| Parameter Name | Description | Required | Default Value | +| --- | --- | --- | --- | +| `buckets_path` | The path to the buckets we wish to find the sum for (see [`buckets_path` Syntax](/reference/data-analysis/aggregations/pipeline.md#buckets-path-syntax) for more details) | Required | | +| `gap_policy` | The policy to apply when gaps are found in the data (see [Dealing with gaps in the data](/reference/data-analysis/aggregations/pipeline.md#gap-policy) for more details) | Optional | `skip` | +| `format` | [DecimalFormat pattern](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/text/DecimalFormat.md) for theoutput value. If specified, the formatted value is returned in the aggregation’s`value_as_string` property. | Optional | `null` | + +The following snippet calculates the sum of all the total monthly `sales` buckets: + +```console +POST /sales/_search +{ + "size": 0, + "aggs": { + "sales_per_month": { + "date_histogram": { + "field": "date", + "calendar_interval": "month" + }, + "aggs": { + "sales": { + "sum": { + "field": "price" + } + } + } + }, + "sum_monthly_sales": { + "sum_bucket": { + "buckets_path": "sales_per_month>sales" <1> + } + } + } +} +``` + +1. `buckets_path` instructs this sum_bucket aggregation that we want the sum of the `sales` aggregation in the `sales_per_month` date histogram. + + +And the following may be the response: + +```console-result +{ + "took": 11, + "timed_out": false, + "_shards": ..., + "hits": ..., + "aggregations": { + "sales_per_month": { + "buckets": [ + { + "key_as_string": "2015/01/01 00:00:00", + "key": 1420070400000, + "doc_count": 3, + "sales": { + "value": 550.0 + } + }, + { + "key_as_string": "2015/02/01 00:00:00", + "key": 1422748800000, + "doc_count": 2, + "sales": { + "value": 60.0 + } + }, + { + "key_as_string": "2015/03/01 00:00:00", + "key": 1425168000000, + "doc_count": 2, + "sales": { + "value": 375.0 + } + } + ] + }, + "sum_monthly_sales": { + "value": 985.0 + } + } +} +``` + + diff --git a/docs/reference/data-analysis/aggregations/search-aggregations-random-sampler-aggregation.md b/docs/reference/data-analysis/aggregations/search-aggregations-random-sampler-aggregation.md new file mode 100644 index 0000000000000..0c68bb40b1ce8 --- /dev/null +++ b/docs/reference/data-analysis/aggregations/search-aggregations-random-sampler-aggregation.md @@ -0,0 +1,98 @@ +--- +navigation_title: "Random sampler" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-random-sampler-aggregation.html +--- + +# Random sampler aggregation [search-aggregations-random-sampler-aggregation] + + +::::{warning} +This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. +:::: + + +The `random_sampler` aggregation is a single bucket aggregation that randomly includes documents in the aggregated results. Sampling provides significant speed improvement at the cost of accuracy. + +The sampling is accomplished by providing a random subset of the entire set of documents in a shard. If a filter query is provided in the search request, that filter is applied over the sampled subset. Consequently, if a filter is restrictive, very few documents might match; therefore, the statistics might not be as accurate. + +::::{note} +This aggregation is not to be confused with the [sampler aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-sampler-aggregation.md). The sampler aggregation is not over all documents; rather, it samples the first `n` documents matched by the query. +:::: + + +```console +GET kibana_sample_data_ecommerce/_search?size=0&track_total_hits=false +{ + "aggregations": { + "sampling": { + "random_sampler": { + "probability": 0.1 + }, + "aggs": { + "price_percentiles": { + "percentiles": { + "field": "taxful_total_price" + } + } + } + } + } +} +``` + +## Top-level parameters for random_sampler [random-sampler-top-level-params] + +`probability` +: (Required, float) The probability that a document will be included in the aggregated data. Must be greater than 0, less than `0.5`, or exactly `1`. The lower the probability, the fewer documents are matched. + +`seed` +: (Optional, integer) The seed to generate the random sampling of documents. When a seed is provided, the random subset of documents is the same between calls. + + +## How does the sampling work? [random-sampler-inner-workings] + +The aggregation is a random sample of all the documents in the index. In other words, the sampling is over the background set of documents. If a query is provided, a document is returned if it is matched by the query and if the document is in the random sampling. The sampling is not done over the matched documents. + +Consider the set of documents `[1, 2, 3, 4, 5]`. Your query matches `[1, 3, 5]` and the randomly sampled set is `[2, 4, 5]`. In this case, the document returned would be `[5]`. + +This type of sampling provides almost linear improvement in query latency in relation to the amount by which sampling reduces the document set size: + +:::{image} ../../../images/random-sampler-agg-graph.png +:alt: Graph of the median speedup by sampling factor +::: + +This graph is typical of the speed up for the majority of aggregations for a test data set of 63 million documents. The exact constants will depend on the data set size and the number of shards, but the form of the relationship between speed up and probability holds widely. For certain aggregations, the speed up may not be as dramatic. These aggregations have some constant overhead unrelated to the number of documents seen. Even for those aggregations, the speed improvements can be significant. + +The sample set is generated by skipping documents using a geometric distribution (`(1-p)^(k-1)*p`) with success probability being the provided `probability` (`p` in the distribution equation). The values returned from the distribution indicate how many documents to skip in the background. This is equivalent to selecting documents uniformly at random. It follows that the expected number of failures before a success is `(1-p)/p`. For example, with the `"probability": 0.01`, the expected number of failures (or average number of documents skipped) would be `99` with a variance of `9900`. Consequently, if you had only 80 documents in your index or matched by your filter, you would most likely receive no results. + +:::{image} ../../../images/relative-error-vs-doc-count.png +:alt: Graph of the relative error by sampling probability and doc count +::: + +In the above image `p` is the probability provided to the aggregation, and `n` is the number of documents matched by whatever query is provided. You can see the impact of outliers on `sum` and `mean`, but when many documents are still matched at higher sampling rates, the relative error is still low. + +::::{note} +This represents the result of aggregations against a typical positively skewed APM data set which also has outliers in the upper tail. The linear dependence of the relative error on the sample size is found to hold widely, but the slope depends on the variation in the quantity being aggregated. As such, the variance in your own data may cause relative error rates to increase or decrease at a different rate. +:::: + + + +## Random sampler consistency [random-sampler-consistency] + +For a given `probability` and `seed`, the random sampler aggregation is consistent when sampling unchanged data from the same shard. However, this is background random sampling if a particular document is included in the sampled set or not is dependent on current number of segments. + +Meaning, replica vs. primary shards could return different values as different particular documents are sampled. + +If the shard changes in via doc addition, update, deletion, or segment merging, the particular documents sampled could change, and thus the resulting statistics could change. + +The resulting statistics used from the random sampler aggregation are approximate and should be treated as such. + + +## Random sampling special cases [random-sampler-special-cases] + +All counts returned by the random sampler aggregation are scaled to ease visualizations and calculations. For example, when randomly sampling a [date histogram aggregation](/reference/data-analysis/aggregations/search-aggregations-bucket-datehistogram-aggregation.md) every `doc_count` value for every bucket is scaled by the inverse of the random_sampler `probability` value. So, if `doc_count` for a bucket is `10,000` with `probability: 0.1`, the actual number of documents aggregated is `1,000`. + +An exception to this is [cardinality aggregation](/reference/data-analysis/aggregations/search-aggregations-metrics-cardinality-aggregation.md). Unique item counts are not suitable for automatic scaling. When interpreting the cardinality count, compare it to the number of sampled docs provided in the top level `doc_count` within the random_sampler aggregation. It gives you an idea of unique values as a percentage of total values. It may not reflect, however, the exact number of unique values for the given field. + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-apostrophe-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-apostrophe-tokenfilter.md new file mode 100644 index 0000000000000..a8bcf715beb3b --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-apostrophe-tokenfilter.md @@ -0,0 +1,54 @@ +--- +navigation_title: "Apostrophe" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-apostrophe-tokenfilter.html +--- + +# Apostrophe token filter [analysis-apostrophe-tokenfilter] + + +Strips all characters after an apostrophe, including the apostrophe itself. + +This filter is included in {{es}}'s built-in [Turkish language analyzer](/reference/data-analysis/text-analysis/analysis-lang-analyzer.md#turkish-analyzer). It uses Lucene’s [ApostropheFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/tr/ApostropheFilter.md), which was built for the Turkish language. + +## Example [analysis-apostrophe-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request demonstrates how the apostrophe token filter works. + +```console +GET /_analyze +{ + "tokenizer" : "standard", + "filter" : ["apostrophe"], + "text" : "Istanbul'a veya Istanbul'dan" +} +``` + +The filter produces the following tokens: + +```text +[ Istanbul, veya, Istanbul ] +``` + + +## Add to an analyzer [analysis-apostrophe-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the apostrophe token filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /apostrophe_example +{ + "settings": { + "analysis": { + "analyzer": { + "standard_apostrophe": { + "tokenizer": "standard", + "filter": [ "apostrophe" ] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-asciifolding-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-asciifolding-tokenfilter.md new file mode 100644 index 0000000000000..9c98f31708cbc --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-asciifolding-tokenfilter.md @@ -0,0 +1,89 @@ +--- +navigation_title: "ASCII folding" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-asciifolding-tokenfilter.html +--- + +# ASCII folding token filter [analysis-asciifolding-tokenfilter] + + +Converts alphabetic, numeric, and symbolic characters that are not in the Basic Latin Unicode block (first 127 ASCII characters) to their ASCII equivalent, if one exists. For example, the filter changes `à` to `a`. + +This filter uses Lucene’s [ASCIIFoldingFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.md). + +## Example [analysis-asciifolding-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `asciifolding` filter to drop the diacritical marks in `açaí à la carte`: + +```console +GET /_analyze +{ + "tokenizer" : "standard", + "filter" : ["asciifolding"], + "text" : "açaí à la carte" +} +``` + +The filter produces the following tokens: + +```text +[ acai, a, la, carte ] +``` + + +## Add to an analyzer [analysis-asciifolding-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `asciifolding` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /asciifold_example +{ + "settings": { + "analysis": { + "analyzer": { + "standard_asciifolding": { + "tokenizer": "standard", + "filter": [ "asciifolding" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-asciifolding-tokenfilter-configure-parms] + +`preserve_original` +: (Optional, Boolean) If `true`, emit both original tokens and folded tokens. Defaults to `false`. + + +## Customize [analysis-asciifolding-tokenfilter-customize] + +To customize the `asciifolding` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom `asciifolding` filter with `preserve_original` set to true: + +```console +PUT /asciifold_example +{ + "settings": { + "analysis": { + "analyzer": { + "standard_asciifolding": { + "tokenizer": "standard", + "filter": [ "my_ascii_folding" ] + } + }, + "filter": { + "my_ascii_folding": { + "type": "asciifolding", + "preserve_original": true + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-chargroup-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-chargroup-tokenizer.md new file mode 100644 index 0000000000000..a04fd37c6c458 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-chargroup-tokenizer.md @@ -0,0 +1,77 @@ +--- +navigation_title: "Character group" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-chargroup-tokenizer.html +--- + +# Character group tokenizer [analysis-chargroup-tokenizer] + + +The `char_group` tokenizer breaks text into terms whenever it encounters a character which is in a defined set. It is mostly useful for cases where a simple custom tokenization is desired, and the overhead of use of the [`pattern` tokenizer](/reference/data-analysis/text-analysis/analysis-pattern-tokenizer.md) is not acceptable. + + +## Configuration [_configuration_8] + +The `char_group` tokenizer accepts one parameter: + +`tokenize_on_chars` +: A list containing a list of characters to tokenize the string on. Whenever a character from this list is encountered, a new token is started. This accepts either single characters like e.g. `-`, or character groups: `whitespace`, `letter`, `digit`, `punctuation`, `symbol`. + +`max_token_length` +: The maximum token length. If a token is seen that exceeds this length then it is split at `max_token_length` intervals. Defaults to `255`. + + +## Example output [_example_output_7] + +```console +POST _analyze +{ + "tokenizer": { + "type": "char_group", + "tokenize_on_chars": [ + "whitespace", + "-", + "\n" + ] + }, + "text": "The QUICK brown-fox" +} +``` + +returns + +```console-result +{ + "tokens": [ + { + "token": "The", + "start_offset": 0, + "end_offset": 3, + "type": "word", + "position": 0 + }, + { + "token": "QUICK", + "start_offset": 4, + "end_offset": 9, + "type": "word", + "position": 1 + }, + { + "token": "brown", + "start_offset": 10, + "end_offset": 15, + "type": "word", + "position": 2 + }, + { + "token": "fox", + "start_offset": 16, + "end_offset": 19, + "type": "word", + "position": 3 + } + ] +} +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-cjk-bigram-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-cjk-bigram-tokenfilter.md new file mode 100644 index 0000000000000..c3b5e8a3dde77 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-cjk-bigram-tokenfilter.md @@ -0,0 +1,103 @@ +--- +navigation_title: "CJK bigram" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-cjk-bigram-tokenfilter.html +--- + +# CJK bigram token filter [analysis-cjk-bigram-tokenfilter] + + +Forms [bigrams](https://en.wikipedia.org/wiki/Bigram) out of CJK (Chinese, Japanese, and Korean) tokens. + +This filter is included in {{es}}'s built-in [CJK language analyzer](/reference/data-analysis/text-analysis/analysis-lang-analyzer.md#cjk-analyzer). It uses Lucene’s [CJKBigramFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/cjk/CJKBigramFilter.md). + +## Example [analysis-cjk-bigram-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request demonstrates how the CJK bigram token filter works. + +```console +GET /_analyze +{ + "tokenizer" : "standard", + "filter" : ["cjk_bigram"], + "text" : "東京都は、日本の首都であり" +} +``` + +The filter produces the following tokens: + +```text +[ 東京, 京都, 都は, 日本, 本の, の首, 首都, 都で, であ, あり ] +``` + + +## Add to an analyzer [analysis-cjk-bigram-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the CJK bigram token filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /cjk_bigram_example +{ + "settings": { + "analysis": { + "analyzer": { + "standard_cjk_bigram": { + "tokenizer": "standard", + "filter": [ "cjk_bigram" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-cjk-bigram-tokenfilter-configure-parms] + +`ignored_scripts` +: (Optional, array of character scripts) Array of character scripts for which to disable bigrams. Possible values: + +* `han` +* `hangul` +* `hiragana` +* `katakana` + +All non-CJK input is passed through unmodified. + + +`output_unigrams` +: (Optional, Boolean) If `true`, emit tokens in both bigram and [unigram](https://en.wikipedia.org/wiki/N-gram) form. If `false`, a CJK character is output in unigram form when it has no adjacent characters. Defaults to `false`. + + +## Customize [analysis-cjk-bigram-tokenfilter-customize] + +To customize the CJK bigram token filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +```console +PUT /cjk_bigram_example +{ + "settings": { + "analysis": { + "analyzer": { + "han_bigrams": { + "tokenizer": "standard", + "filter": [ "han_bigrams_filter" ] + } + }, + "filter": { + "han_bigrams_filter": { + "type": "cjk_bigram", + "ignored_scripts": [ + "hangul", + "hiragana", + "katakana" + ], + "output_unigrams": true + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-cjk-width-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-cjk-width-tokenfilter.md new file mode 100644 index 0000000000000..ed9616255885a --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-cjk-width-tokenfilter.md @@ -0,0 +1,60 @@ +--- +navigation_title: "CJK width" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-cjk-width-tokenfilter.html +--- + +# CJK width token filter [analysis-cjk-width-tokenfilter] + + +Normalizes width differences in CJK (Chinese, Japanese, and Korean) characters as follows: + +* Folds full-width ASCII character variants into the equivalent basic Latin characters +* Folds half-width Katakana character variants into the equivalent Kana characters + +This filter is included in {{es}}'s built-in [CJK language analyzer](/reference/data-analysis/text-analysis/analysis-lang-analyzer.md#cjk-analyzer). It uses Lucene’s [CJKWidthFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/cjk/CJKWidthFilter.md). + +::::{note} +This token filter can be viewed as a subset of NFKC/NFKD Unicode normalization. See the [`analysis-icu` plugin](/reference/elasticsearch-plugins/analysis-icu-normalization-charfilter.md) for full normalization support. +:::: + + +## Example [analysis-cjk-width-tokenfilter-analyze-ex] + +```console +GET /_analyze +{ + "tokenizer" : "standard", + "filter" : ["cjk_width"], + "text" : "シーサイドライナー" +} +``` + +The filter produces the following token: + +```text +シーサイドライナー +``` + + +## Add to an analyzer [analysis-cjk-width-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the CJK width token filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /cjk_width_example +{ + "settings": { + "analysis": { + "analyzer": { + "standard_cjk_width": { + "tokenizer": "standard", + "filter": [ "cjk_width" ] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-classic-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-classic-tokenfilter.md new file mode 100644 index 0000000000000..403030d012d1b --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-classic-tokenfilter.md @@ -0,0 +1,54 @@ +--- +navigation_title: "Classic" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-classic-tokenfilter.html +--- + +# Classic token filter [analysis-classic-tokenfilter] + + +Performs optional post-processing of terms generated by the [`classic` tokenizer](/reference/data-analysis/text-analysis/analysis-classic-tokenizer.md). + +This filter removes the english possessive (`'s`) from the end of words and removes dots from acronyms. It uses Lucene’s [ClassicFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/standard/ClassicFilter.md). + +## Example [analysis-classic-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request demonstrates how the classic token filter works. + +```console +GET /_analyze +{ + "tokenizer" : "classic", + "filter" : ["classic"], + "text" : "The 2 Q.U.I.C.K. Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The filter produces the following tokens: + +```text +[ The, 2, QUICK, Brown, Foxes, jumped, over, the, lazy, dog, bone ] +``` + + +## Add to an analyzer [analysis-classic-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the classic token filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /classic_example +{ + "settings": { + "analysis": { + "analyzer": { + "classic_analyzer": { + "tokenizer": "classic", + "filter": [ "classic" ] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-classic-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-classic-tokenizer.md new file mode 100644 index 0000000000000..771b277d337f3 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-classic-tokenizer.md @@ -0,0 +1,78 @@ +--- +navigation_title: "Classic" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-classic-tokenizer.html +--- + +# Classic tokenizer [analysis-classic-tokenizer] + + +The `classic` tokenizer is a grammar based tokenizer that is good for English language documents. This tokenizer has heuristics for special treatment of acronyms, company names, email addresses, and internet host names. However, these rules don’t always work, and the tokenizer doesn’t work well for most languages other than English: + +* It splits words at most punctuation characters, removing punctuation. However, a dot that’s not followed by whitespace is considered part of a token. +* It splits words at hyphens, unless there’s a number in the token, in which case the whole token is interpreted as a product number and is not split. +* It recognizes email addresses and internet hostnames as one token. + + +## Example output [_example_output_8] + +```console +POST _analyze +{ + "tokenizer": "classic", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above sentence would produce the following terms: + +```text +[ The, 2, QUICK, Brown, Foxes, jumped, over, the, lazy, dog's, bone ] +``` + + +## Configuration [_configuration_9] + +The `classic` tokenizer accepts the following parameters: + +`max_token_length` +: The maximum token length. If a token is seen that exceeds this length then it is split at `max_token_length` intervals. Defaults to `255`. + + +## Example configuration [_example_configuration_6] + +In this example, we configure the `classic` tokenizer to have a `max_token_length` of 5 (for demonstration purposes): + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "my_tokenizer" + } + }, + "tokenizer": { + "my_tokenizer": { + "type": "classic", + "max_token_length": 5 + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_analyzer", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above example produces the following terms: + +```text +[ The, 2, QUICK, Brown, Foxes, jumpe, d, over, the, lazy, dog's, bone ] +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-common-grams-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-common-grams-tokenfilter.md new file mode 100644 index 0000000000000..01e52be2411a6 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-common-grams-tokenfilter.md @@ -0,0 +1,131 @@ +--- +navigation_title: "Common grams" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-common-grams-tokenfilter.html +--- + +# Common grams token filter [analysis-common-grams-tokenfilter] + + +Generates [bigrams](https://en.wikipedia.org/wiki/Bigram) for a specified set of common words. + +For example, you can specify `is` and `the` as common words. This filter then converts the tokens `[the, quick, fox, is, brown]` to `[the, the_quick, quick, fox, fox_is, is, is_brown, brown]`. + +You can use the `common_grams` filter in place of the [stop token filter](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) when you don’t want to completely ignore common words. + +This filter uses Lucene’s [CommonGramsFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/commongrams/CommonGramsFilter.md). + +## Example [analysis-common-grams-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request creates bigrams for `is` and `the`: + +```console +GET /_analyze +{ + "tokenizer" : "whitespace", + "filter" : [ + { + "type": "common_grams", + "common_words": ["is", "the"] + } + ], + "text" : "the quick fox is brown" +} +``` + +The filter produces the following tokens: + +```text +[ the, the_quick, quick, fox, fox_is, is, is_brown, brown ] +``` + + +## Add to an analyzer [analysis-common-grams-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `common_grams` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md): + +```console +PUT /common_grams_example +{ + "settings": { + "analysis": { + "analyzer": { + "index_grams": { + "tokenizer": "whitespace", + "filter": [ "common_grams" ] + } + }, + "filter": { + "common_grams": { + "type": "common_grams", + "common_words": [ "a", "is", "the" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-common-grams-tokenfilter-configure-parms] + +`common_words` +: (Required*, array of strings) A list of tokens. The filter generates bigrams for these tokens. + +Either this or the `common_words_path` parameter is required. + + +`common_words_path` +: (Required*, string) Path to a file containing a list of tokens. The filter generates bigrams for these tokens. + +This path must be absolute or relative to the `config` location. The file must be UTF-8 encoded. Each token in the file must be separated by a line break. + +Either this or the `common_words` parameter is required. + + +`ignore_case` +: (Optional, Boolean) If `true`, matches for common words matching are case-insensitive. Defaults to `false`. + +`query_mode` +: (Optional, Boolean) If `true`, the filter excludes the following tokens from the output: + +* Unigrams for common words +* Unigrams for terms followed by common words + +Defaults to `false`. We recommend enabling this parameter for [search analyzers](/reference/elasticsearch/mapping-reference/search-analyzer.md). + +For example, you can enable this parameter and specify `is` and `the` as common words. This filter converts the tokens `[the, quick, fox, is, brown]` to `[the_quick, quick, fox_is, is_brown,]`. + + + +## Customize [analysis-common-grams-tokenfilter-customize] + +To customize the `common_grams` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom `common_grams` filter with `ignore_case` and `query_mode` set to `true`: + +```console +PUT /common_grams_example +{ + "settings": { + "analysis": { + "analyzer": { + "index_grams": { + "tokenizer": "whitespace", + "filter": [ "common_grams_query" ] + } + }, + "filter": { + "common_grams_query": { + "type": "common_grams", + "common_words": [ "a", "is", "the" ], + "ignore_case": true, + "query_mode": true + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-condition-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-condition-tokenfilter.md new file mode 100644 index 0000000000000..039d1c6dc2665 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-condition-tokenfilter.md @@ -0,0 +1,88 @@ +--- +navigation_title: "Conditional" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-condition-tokenfilter.html +--- + +# Conditional token filter [analysis-condition-tokenfilter] + + +Applies a set of token filters to tokens that match conditions in a provided predicate script. + +This filter uses Lucene’s [ConditionalTokenFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/ConditionalTokenFilter.md). + +## Example [analysis-condition-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `condition` filter to match tokens with fewer than 5 characters in `THE QUICK BROWN FOX`. It then applies the [`lowercase`](/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md) filter to those matching tokens, converting them to lowercase. + +```console +GET /_analyze +{ + "tokenizer": "standard", + "filter": [ + { + "type": "condition", + "filter": [ "lowercase" ], + "script": { + "source": "token.getTerm().length() < 5" + } + } + ], + "text": "THE QUICK BROWN FOX" +} +``` + +The filter produces the following tokens: + +```text +[ the, QUICK, BROWN, fox ] +``` + + +## Configurable parameters [analysis-condition-tokenfilter-configure-parms] + +`filter` +: (Required, array of token filters) Array of token filters. If a token matches the predicate script in the `script` parameter, these filters are applied to the token in the order provided. + +These filters can include custom token filters defined in the index mapping. + + +`script` +: (Required, [script object](docs-content://explore-analyze/scripting/modules-scripting-using.md)) Predicate script used to apply token filters. If a token matches this script, the filters in the `filter` parameter are applied to the token. + +For valid parameters, see [*How to write scripts*](docs-content://explore-analyze/scripting/modules-scripting-using.md). Only inline scripts are supported. Painless scripts are executed in the [analysis predicate context](/reference/scripting-languages/painless/painless-analysis-predicate-context.md) and require a `token` property. + + + +## Customize and add to an analyzer [analysis-condition-tokenfilter-customize] + +To customize the `condition` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses a custom `condition` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). The custom `condition` filter matches the first token in a stream. It then reverses that matching token using the [`reverse`](/reference/data-analysis/text-analysis/analysis-reverse-tokenfilter.md) filter. + +```console +PUT /palindrome_list +{ + "settings": { + "analysis": { + "analyzer": { + "whitespace_reverse_first_token": { + "tokenizer": "whitespace", + "filter": [ "reverse_first_token" ] + } + }, + "filter": { + "reverse_first_token": { + "type": "condition", + "filter": [ "reverse" ], + "script": { + "source": "token.getPosition() === 0" + } + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-decimal-digit-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-decimal-digit-tokenfilter.md new file mode 100644 index 0000000000000..9cdead1bf23dc --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-decimal-digit-tokenfilter.md @@ -0,0 +1,54 @@ +--- +navigation_title: "Decimal digit" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-decimal-digit-tokenfilter.html +--- + +# Decimal digit token filter [analysis-decimal-digit-tokenfilter] + + +Converts all digits in the Unicode `Decimal_Number` General Category to `0-9`. For example, the filter changes the Bengali numeral `৩` to `3`. + +This filter uses Lucene’s [DecimalDigitFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/core/DecimalDigitFilter.md). + +## Example [analysis-decimal-digit-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `decimal_digit` filter to convert Devanagari numerals to `0-9`: + +```console +GET /_analyze +{ + "tokenizer" : "whitespace", + "filter" : ["decimal_digit"], + "text" : "१-one two-२ ३" +} +``` + +The filter produces the following tokens: + +```text +[ 1-one, two-2, 3] +``` + + +## Add to an analyzer [analysis-decimal-digit-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `decimal_digit` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /decimal_digit_example +{ + "settings": { + "analysis": { + "analyzer": { + "whitespace_decimal_digit": { + "tokenizer": "whitespace", + "filter": [ "decimal_digit" ] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-delimited-payload-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-delimited-payload-tokenfilter.md new file mode 100644 index 0000000000000..f705206e046a6 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-delimited-payload-tokenfilter.md @@ -0,0 +1,248 @@ +--- +navigation_title: "Delimited payload" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-delimited-payload-tokenfilter.html +--- + +# Delimited payload token filter [analysis-delimited-payload-tokenfilter] + + +::::{warning} +The older name `delimited_payload_filter` is deprecated and should not be used with new indices. Use `delimited_payload` instead. + +:::: + + +Separates a token stream into tokens and payloads based on a specified delimiter. + +For example, you can use the `delimited_payload` filter with a `|` delimiter to split `the|1 quick|2 fox|3` into the tokens `the`, `quick`, and `fox` with respective payloads of `1`, `2`, and `3`. + +This filter uses Lucene’s [DelimitedPayloadTokenFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.md). + +::::{admonition} Payloads +:class: note + +A payload is user-defined binary data associated with a token position and stored as base64-encoded bytes. + +{{es}} does not store token payloads by default. To store payloads, you must: + +* Set the [`term_vector`](/reference/elasticsearch/mapping-reference/term-vector.md) mapping parameter to `with_positions_payloads` or `with_positions_offsets_payloads` for any field storing payloads. +* Use an index analyzer that includes the `delimited_payload` filter + +You can view stored payloads using the [term vectors API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-termvectors). + +:::: + + +## Example [analysis-delimited-payload-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `delimited_payload` filter with the default `|` delimiter to split `the|0 brown|10 fox|5 is|0 quick|10` into tokens and payloads. + +```console +GET _analyze +{ + "tokenizer": "whitespace", + "filter": ["delimited_payload"], + "text": "the|0 brown|10 fox|5 is|0 quick|10" +} +``` + +The filter produces the following tokens: + +```text +[ the, brown, fox, is, quick ] +``` + +Note that the analyze API does not return stored payloads. For an example that includes returned payloads, see [Return stored payloads](#analysis-delimited-payload-tokenfilter-return-stored-payloads). + + +## Add to an analyzer [analysis-delimited-payload-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `delimited-payload` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT delimited_payload +{ + "settings": { + "analysis": { + "analyzer": { + "whitespace_delimited_payload": { + "tokenizer": "whitespace", + "filter": [ "delimited_payload" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-delimited-payload-tokenfilter-configure-parms] + +`delimiter` +: (Optional, string) Character used to separate tokens from payloads. Defaults to `|`. + +`encoding` +: (Optional, string) Data type for the stored payload. Valid values are: + +`float` +: (Default) Float + +`identity` +: Characters + +`int` +: Integer + + + +## Customize and add to an analyzer [analysis-delimited-payload-tokenfilter-customize] + +To customize the `delimited_payload` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses a custom `delimited_payload` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). The custom `delimited_payload` filter uses the `+` delimiter to separate tokens from payloads. Payloads are encoded as integers. + +```console +PUT delimited_payload_example +{ + "settings": { + "analysis": { + "analyzer": { + "whitespace_plus_delimited": { + "tokenizer": "whitespace", + "filter": [ "plus_delimited" ] + } + }, + "filter": { + "plus_delimited": { + "type": "delimited_payload", + "delimiter": "+", + "encoding": "int" + } + } + } + } +} +``` + + +## Return stored payloads [analysis-delimited-payload-tokenfilter-return-stored-payloads] + +Use the [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) to create an index that: + +* Includes a field that stores term vectors with payloads. +* Uses a [custom index analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md) with the `delimited_payload` filter. + +```console +PUT text_payloads +{ + "mappings": { + "properties": { + "text": { + "type": "text", + "term_vector": "with_positions_payloads", + "analyzer": "payload_delimiter" + } + } + }, + "settings": { + "analysis": { + "analyzer": { + "payload_delimiter": { + "tokenizer": "whitespace", + "filter": [ "delimited_payload" ] + } + } + } + } +} +``` + +Add a document containing payloads to the index. + +```console +POST text_payloads/_doc/1 +{ + "text": "the|0 brown|3 fox|4 is|0 quick|10" +} +``` + +Use the [term vectors API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-termvectors) to return the document’s tokens and base64-encoded payloads. + +```console +GET text_payloads/_termvectors/1 +{ + "fields": [ "text" ], + "payloads": true +} +``` + +The API returns the following response: + +```console-result +{ + "_index": "text_payloads", + "_id": "1", + "_version": 1, + "found": true, + "took": 8, + "term_vectors": { + "text": { + "field_statistics": { + "sum_doc_freq": 5, + "doc_count": 1, + "sum_ttf": 5 + }, + "terms": { + "brown": { + "term_freq": 1, + "tokens": [ + { + "position": 1, + "payload": "QEAAAA==" + } + ] + }, + "fox": { + "term_freq": 1, + "tokens": [ + { + "position": 2, + "payload": "QIAAAA==" + } + ] + }, + "is": { + "term_freq": 1, + "tokens": [ + { + "position": 3, + "payload": "AAAAAA==" + } + ] + }, + "quick": { + "term_freq": 1, + "tokens": [ + { + "position": 4, + "payload": "QSAAAA==" + } + ] + }, + "the": { + "term_freq": 1, + "tokens": [ + { + "position": 0, + "payload": "AAAAAA==" + } + ] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-dict-decomp-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-dict-decomp-tokenfilter.md new file mode 100644 index 0000000000000..ae047572ea04f --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-dict-decomp-tokenfilter.md @@ -0,0 +1,105 @@ +--- +navigation_title: "Dictionary decompounder" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-dict-decomp-tokenfilter.html +--- + +# Dictionary decompounder token filter [analysis-dict-decomp-tokenfilter] + + +::::{note} +In most cases, we recommend using the faster [`hyphenation_decompounder`](/reference/data-analysis/text-analysis/analysis-hyp-decomp-tokenfilter.md) token filter in place of this filter. However, you can use the `dictionary_decompounder` filter to check the quality of a word list before implementing it in the `hyphenation_decompounder` filter. + +:::: + + +Uses a specified list of words and a brute force approach to find subwords in compound words. If found, these subwords are included in the token output. + +This filter uses Lucene’s [DictionaryCompoundWordTokenFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.md), which was built for Germanic languages. + +## Example [analysis-dict-decomp-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `dictionary_decompounder` filter to find subwords in `Donaudampfschiff`. The filter then checks these subwords against the specified list of words: `Donau`, `dampf`, `meer`, and `schiff`. + +```console +GET _analyze +{ + "tokenizer": "standard", + "filter": [ + { + "type": "dictionary_decompounder", + "word_list": ["Donau", "dampf", "meer", "schiff"] + } + ], + "text": "Donaudampfschiff" +} +``` + +The filter produces the following tokens: + +```text +[ Donaudampfschiff, Donau, dampf, schiff ] +``` + + +## Configurable parameters [analysis-dict-decomp-tokenfilter-configure-parms] + +`word_list` +: (Required*, array of strings) A list of subwords to look for in the token stream. If found, the subword is included in the token output. + +Either this parameter or `word_list_path` must be specified. + + +`word_list_path` +: (Required*, string) Path to a file that contains a list of subwords to find in the token stream. If found, the subword is included in the token output. + +This path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each token in the file must be separated by a line break. + +Either this parameter or `word_list` must be specified. + + +`max_subword_size` +: (Optional, integer) Maximum subword character length. Longer subword tokens are excluded from the output. Defaults to `15`. + +`min_subword_size` +: (Optional, integer) Minimum subword character length. Shorter subword tokens are excluded from the output. Defaults to `2`. + +`min_word_size` +: (Optional, integer) Minimum word character length. Shorter word tokens are excluded from the output. Defaults to `5`. + +`only_longest_match` +: (Optional, Boolean) If `true`, only include the longest matching subword. Defaults to `false`. + + +## Customize and add to an analyzer [analysis-dict-decomp-tokenfilter-customize] + +To customize the `dictionary_decompounder` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses a custom `dictionary_decompounder` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +The custom `dictionary_decompounder` filter find subwords in the `analysis/example_word_list.txt` file. Subwords longer than 22 characters are excluded from the token output. + +```console +PUT dictionary_decompound_example +{ + "settings": { + "analysis": { + "analyzer": { + "standard_dictionary_decompound": { + "tokenizer": "standard", + "filter": [ "22_char_dictionary_decompound" ] + } + }, + "filter": { + "22_char_dictionary_decompound": { + "type": "dictionary_decompounder", + "word_list_path": "analysis/example_word_list.txt", + "max_subword_size": 22 + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-edgengram-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-edgengram-tokenfilter.md new file mode 100644 index 0000000000000..4f7ec5357f0a2 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-edgengram-tokenfilter.md @@ -0,0 +1,131 @@ +--- +navigation_title: "Edge n-gram" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-edgengram-tokenfilter.html +--- + +# Edge n-gram token filter [analysis-edgengram-tokenfilter] + + +Forms an [n-gram](https://en.wikipedia.org/wiki/N-gram) of a specified length from the beginning of a token. + +For example, you can use the `edge_ngram` token filter to change `quick` to `qu`. + +When not customized, the filter creates 1-character edge n-grams by default. + +This filter uses Lucene’s [EdgeNGramTokenFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.md). + +::::{note} +The `edge_ngram` filter is similar to the [`ngram` token filter](/reference/data-analysis/text-analysis/analysis-ngram-tokenizer.md). However, the `edge_ngram` only outputs n-grams that start at the beginning of a token. These edge n-grams are useful for [search-as-you-type](/reference/elasticsearch/mapping-reference/search-as-you-type.md) queries. + +:::: + + +## Example [analysis-edgengram-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `edge_ngram` filter to convert `the quick brown fox jumps` to 1-character and 2-character edge n-grams: + +```console +GET _analyze +{ + "tokenizer": "standard", + "filter": [ + { "type": "edge_ngram", + "min_gram": 1, + "max_gram": 2 + } + ], + "text": "the quick brown fox jumps" +} +``` + +The filter produces the following tokens: + +```text +[ t, th, q, qu, b, br, f, fo, j, ju ] +``` + + +## Add to an analyzer [analysis-edgengram-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `edge_ngram` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT edge_ngram_example +{ + "settings": { + "analysis": { + "analyzer": { + "standard_edge_ngram": { + "tokenizer": "standard", + "filter": [ "edge_ngram" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-edgengram-tokenfilter-configure-parms] + +`max_gram` +: (Optional, integer) Maximum character length of a gram. For custom token filters, defaults to `2`. For the built-in `edge_ngram` filter, defaults to `1`. + +See [Limitations of the `max_gram` parameter](#analysis-edgengram-tokenfilter-max-gram-limits). + + +`min_gram` +: (Optional, integer) Minimum character length of a gram. Defaults to `1`. + +`preserve_original` +: (Optional, Boolean) Emits original token when set to `true`. Defaults to `false`. + +`side` +: (Optional, string) [8.16.0]. Indicates whether to truncate tokens from the `front` or `back`. Defaults to `front`. + + + +## Customize [analysis-edgengram-tokenfilter-customize] + +To customize the `edge_ngram` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom `edge_ngram` filter that forms n-grams between 3-5 characters. + +```console +PUT edge_ngram_custom_example +{ + "settings": { + "analysis": { + "analyzer": { + "default": { + "tokenizer": "whitespace", + "filter": [ "3_5_edgegrams" ] + } + }, + "filter": { + "3_5_edgegrams": { + "type": "edge_ngram", + "min_gram": 3, + "max_gram": 5 + } + } + } + } +} +``` + + +## Limitations of the `max_gram` parameter [analysis-edgengram-tokenfilter-max-gram-limits] + +The `edge_ngram` filter’s `max_gram` value limits the character length of tokens. When the `edge_ngram` filter is used with an index analyzer, this means search terms longer than the `max_gram` length may not match any indexed terms. + +For example, if the `max_gram` is `3`, searches for `apple` won’t match the indexed term `app`. + +To account for this, you can use the [`truncate`](/reference/data-analysis/text-analysis/analysis-truncate-tokenfilter.md) filter with a search analyzer to shorten search terms to the `max_gram` character length. However, this could return irrelevant results. + +For example, if the `max_gram` is `3` and search terms are truncated to three characters, the search term `apple` is shortened to `app`. This means searches for `apple` return any indexed terms matching `app`, such as `apply`, `snapped`, and `apple`. + +We recommend testing both approaches to see which best fits your use case and desired search experience. + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-edgengram-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-edgengram-tokenizer.md new file mode 100644 index 0000000000000..82ccf9850b039 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-edgengram-tokenizer.md @@ -0,0 +1,197 @@ +--- +navigation_title: "Edge n-gram" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-edgengram-tokenizer.html +--- + +# Edge n-gram tokenizer [analysis-edgengram-tokenizer] + + +The `edge_ngram` tokenizer first breaks text down into words whenever it encounters one of a list of specified characters, then it emits [N-grams](https://en.wikipedia.org/wiki/N-gram) of each word where the start of the N-gram is anchored to the beginning of the word. + +Edge N-Grams are useful for *search-as-you-type* queries. + +::::{tip} +When you need *search-as-you-type* for text which has a widely known order, such as movie or song titles, the completion suggester is a much more efficient choice than edge N-grams. Edge N-grams have the advantage when trying to autocomplete words that can appear in any order. For more information about completion suggesters, refer to [](/reference/elasticsearch/rest-apis/search-suggesters.md). +:::: + + + +## Example output [_example_output_9] + +With the default settings, the `edge_ngram` tokenizer treats the initial text as a single token and produces N-grams with minimum length `1` and maximum length `2`: + +```console +POST _analyze +{ + "tokenizer": "edge_ngram", + "text": "Quick Fox" +} +``` + +The above sentence would produce the following terms: + +```text +[ Q, Qu ] +``` + +::::{note} +These default gram lengths are almost entirely useless. You need to configure the `edge_ngram` before using it. +:::: + + + +## Configuration [_configuration_10] + +The `edge_ngram` tokenizer accepts the following parameters: + +`min_gram` +: Minimum length of characters in a gram. Defaults to `1`. + +`max_gram` +: Maximum length of characters in a gram. Defaults to `2`. + +See [Limitations of the `max_gram` parameter](#max-gram-limits). + + +`token_chars` +: Character classes that should be included in a token. Elasticsearch will split on characters that don’t belong to the classes specified. Defaults to `[]` (keep all characters). + + Character classes may be any of the following: + + * `letter` —  for example `a`, `b`, `ï` or `京` + * `digit` —  for example `3` or `7` + * `whitespace` —  for example `" "` or `"\n"` + * `punctuation` — for example `!` or `"` + * `symbol` —  for example `$` or `√` + * `custom` —  custom characters which need to be set using the `custom_token_chars` setting. + + +`custom_token_chars` +: Custom characters that should be treated as part of a token. For example, setting this to `+-_` will make the tokenizer treat the plus, minus and underscore sign as part of a token. + + +## Limitations of the `max_gram` parameter [max-gram-limits] + +The `edge_ngram` tokenizer’s `max_gram` value limits the character length of tokens. When the `edge_ngram` tokenizer is used with an index analyzer, this means search terms longer than the `max_gram` length may not match any indexed terms. + +For example, if the `max_gram` is `3`, searches for `apple` won’t match the indexed term `app`. + +To account for this, you can use the [`truncate`](/reference/data-analysis/text-analysis/analysis-truncate-tokenfilter.md) token filter with a search analyzer to shorten search terms to the `max_gram` character length. However, this could return irrelevant results. + +For example, if the `max_gram` is `3` and search terms are truncated to three characters, the search term `apple` is shortened to `app`. This means searches for `apple` return any indexed terms matching `app`, such as `apply`, `approximate` and `apple`. + +We recommend testing both approaches to see which best fits your use case and desired search experience. + + +## Example configuration [_example_configuration_7] + +In this example, we configure the `edge_ngram` tokenizer to treat letters and digits as tokens, and to produce grams with minimum length `2` and maximum length `10`: + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "my_tokenizer" + } + }, + "tokenizer": { + "my_tokenizer": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 10, + "token_chars": [ + "letter", + "digit" + ] + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_analyzer", + "text": "2 Quick Foxes." +} +``` + +The above example produces the following terms: + +```text +[ Qu, Qui, Quic, Quick, Fo, Fox, Foxe, Foxes ] +``` + +Usually we recommend using the same `analyzer` at index time and at search time. In the case of the `edge_ngram` tokenizer, the advice is different. It only makes sense to use the `edge_ngram` tokenizer at index time, to ensure that partial words are available for matching in the index. At search time, just search for the terms the user has typed in, for instance: `Quick Fo`. + +Below is an example of how to set up a field for *search-as-you-type*. + +Note that the `max_gram` value for the index analyzer is `10`, which limits indexed terms to 10 characters. Search terms are not truncated, meaning that search terms longer than 10 characters may not match any indexed terms. + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "autocomplete": { + "tokenizer": "autocomplete", + "filter": [ + "lowercase" + ] + }, + "autocomplete_search": { + "tokenizer": "lowercase" + } + }, + "tokenizer": { + "autocomplete": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 10, + "token_chars": [ + "letter" + ] + } + } + } + }, + "mappings": { + "properties": { + "title": { + "type": "text", + "analyzer": "autocomplete", + "search_analyzer": "autocomplete_search" + } + } + } +} + +PUT my-index-000001/_doc/1 +{ + "title": "Quick Foxes" <1> +} + +POST my-index-000001/_refresh + +GET my-index-000001/_search +{ + "query": { + "match": { + "title": { + "query": "Quick Fo", <2> + "operator": "and" + } + } + } +} +``` + +1. The `autocomplete` analyzer indexes the terms `[qu, qui, quic, quick, fo, fox, foxe, foxes]`. +2. The `autocomplete_search` analyzer searches for the terms `[quick, fo]`, both of which appear in the index. + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-elision-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-elision-tokenfilter.md new file mode 100644 index 0000000000000..79eb95c5bbbbf --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-elision-tokenfilter.md @@ -0,0 +1,121 @@ +--- +navigation_title: "Elision" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-elision-tokenfilter.html +--- + +# Elision token filter [analysis-elision-tokenfilter] + + +Removes specified [elisions](https://en.wikipedia.org/wiki/Elision) from the beginning of tokens. For example, you can use this filter to change `l'avion` to `avion`. + +When not customized, the filter removes the following French elisions by default: + +`l'`, `m'`, `t'`, `qu'`, `n'`, `s'`, `j'`, `d'`, `c'`, `jusqu'`, `quoiqu'`, `lorsqu'`, `puisqu'` + +Customized versions of this filter are included in several of {{es}}'s built-in [language analyzers](/reference/data-analysis/text-analysis/analysis-lang-analyzer.md): + +* [Catalan analyzer](/reference/data-analysis/text-analysis/analysis-lang-analyzer.md#catalan-analyzer) +* [French analyzer](/reference/data-analysis/text-analysis/analysis-lang-analyzer.md#french-analyzer) +* [Irish analyzer](/reference/data-analysis/text-analysis/analysis-lang-analyzer.md#irish-analyzer) +* [Italian analyzer](/reference/data-analysis/text-analysis/analysis-lang-analyzer.md#italian-analyzer) + +This filter uses Lucene’s [ElisionFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/util/ElisionFilter.md). + +## Example [analysis-elision-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `elision` filter to remove `j'` from `j’examine près du wharf`: + +```console +GET _analyze +{ + "tokenizer" : "standard", + "filter" : ["elision"], + "text" : "j’examine près du wharf" +} +``` + +The filter produces the following tokens: + +```text +[ examine, près, du, wharf ] +``` + + +## Add to an analyzer [analysis-elision-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `elision` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /elision_example +{ + "settings": { + "analysis": { + "analyzer": { + "whitespace_elision": { + "tokenizer": "whitespace", + "filter": [ "elision" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-elision-tokenfilter-configure-parms] + +$$$analysis-elision-tokenfilter-articles$$$ + +`articles` +: (Required*, array of string) List of elisions to remove. + +To be removed, the elision must be at the beginning of a token and be immediately followed by an apostrophe. Both the elision and apostrophe are removed. + +For custom `elision` filters, either this parameter or `articles_path` must be specified. + + +`articles_path` +: (Required*, string) Path to a file that contains a list of elisions to remove. + +This path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each elision in the file must be separated by a line break. + +To be removed, the elision must be at the beginning of a token and be immediately followed by an apostrophe. Both the elision and apostrophe are removed. + +For custom `elision` filters, either this parameter or `articles` must be specified. + + +`articles_case` +: (Optional, Boolean) If `true`, elision matching is case insensitive. If `false`, elision matching is case sensitive. Defaults to `false`. + + +## Customize [analysis-elision-tokenfilter-customize] + +To customize the `elision` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom case-insensitive `elision` filter that removes the `l'`, `m'`, `t'`, `qu'`, `n'`, `s'`, and `j'` elisions: + +```console +PUT /elision_case_insensitive_example +{ + "settings": { + "analysis": { + "analyzer": { + "default": { + "tokenizer": "whitespace", + "filter": [ "elision_case_insensitive" ] + } + }, + "filter": { + "elision_case_insensitive": { + "type": "elision", + "articles": [ "l", "m", "t", "qu", "n", "s", "j" ], + "articles_case": true + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-fingerprint-analyzer.md b/docs/reference/data-analysis/text-analysis/analysis-fingerprint-analyzer.md new file mode 100644 index 0000000000000..d00f108c93f82 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-fingerprint-analyzer.md @@ -0,0 +1,120 @@ +--- +navigation_title: "Fingerprint" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-fingerprint-analyzer.html +--- + +# Fingerprint analyzer [analysis-fingerprint-analyzer] + + +The `fingerprint` analyzer implements a [fingerprinting algorithm](https://github.com/OpenRefine/OpenRefine/wiki/Clustering-In-Depth#fingerprint) which is used by the OpenRefine project to assist in clustering. + +Input text is lowercased, normalized to remove extended characters, sorted, deduplicated and concatenated into a single token. If a stopword list is configured, stop words will also be removed. + + +## Example output [_example_output] + +```console +POST _analyze +{ + "analyzer": "fingerprint", + "text": "Yes yes, Gödel said this sentence is consistent and." +} +``` + +The above sentence would produce the following single term: + +```text +[ and consistent godel is said sentence this yes ] +``` + + +## Configuration [_configuration_2] + +The `fingerprint` analyzer accepts the following parameters: + +`separator` +: The character to use to concatenate the terms. Defaults to a space. + +`max_output_size` +: The maximum token size to emit. Defaults to `255`. Tokens larger than this size will be discarded. + +`stopwords` +: A pre-defined stop words list like `_english_` or an array containing a list of stop words. Defaults to `_none_`. + +`stopwords_path` +: The path to a file containing stop words. + +See the [Stop Token Filter](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) for more information about stop word configuration. + + +## Example configuration [_example_configuration_2] + +In this example, we configure the `fingerprint` analyzer to use the pre-defined list of English stop words: + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_fingerprint_analyzer": { + "type": "fingerprint", + "stopwords": "_english_" + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_fingerprint_analyzer", + "text": "Yes yes, Gödel said this sentence is consistent and." +} +``` + +The above example produces the following term: + +```text +[ consistent godel said sentence yes ] +``` + + +## Definition [_definition] + +The `fingerprint` tokenizer consists of: + +Tokenizer +: * [Standard Tokenizer](/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md) + + +Token Filters (in order) +: * [Lower Case Token Filter](/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md) +* [ASCII folding](/reference/data-analysis/text-analysis/analysis-asciifolding-tokenfilter.md) +* [Stop Token Filter](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) (disabled by default) +* [Fingerprint](/reference/data-analysis/text-analysis/analysis-fingerprint-tokenfilter.md) + + +If you need to customize the `fingerprint` analyzer beyond the configuration parameters then you need to recreate it as a `custom` analyzer and modify it, usually by adding token filters. This would recreate the built-in `fingerprint` analyzer and you can use it as a starting point for further customization: + +```console +PUT /fingerprint_example +{ + "settings": { + "analysis": { + "analyzer": { + "rebuilt_fingerprint": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "asciifolding", + "fingerprint" + ] + } + } + } + } +} +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-fingerprint-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-fingerprint-tokenfilter.md new file mode 100644 index 0000000000000..2689a1c099b60 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-fingerprint-tokenfilter.md @@ -0,0 +1,103 @@ +--- +navigation_title: "Fingerprint" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-fingerprint-tokenfilter.html +--- + +# Fingerprint token filter [analysis-fingerprint-tokenfilter] + + +Sorts and removes duplicate tokens from a token stream, then concatenates the stream into a single output token. + +For example, this filter changes the `[ the, fox, was, very, very, quick ]` token stream as follows: + +1. Sorts the tokens alphabetically to `[ fox, quick, the, very, very, was ]` +2. Removes a duplicate instance of the `very` token. +3. Concatenates the token stream to a output single token: `[fox quick the very was ]` + +Output tokens produced by this filter are useful for fingerprinting and clustering a body of text as described in the [OpenRefine project](https://github.com/OpenRefine/OpenRefine/wiki/Clustering-In-Depth#fingerprint). + +This filter uses Lucene’s [FingerprintFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.md). + +## Example [analysis-fingerprint-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `fingerprint` filter to create a single output token for the text `zebra jumps over resting resting dog`: + +```console +GET _analyze +{ + "tokenizer" : "whitespace", + "filter" : ["fingerprint"], + "text" : "zebra jumps over resting resting dog" +} +``` + +The filter produces the following token: + +```text +[ dog jumps over resting zebra ] +``` + + +## Add to an analyzer [analysis-fingerprint-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `fingerprint` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT fingerprint_example +{ + "settings": { + "analysis": { + "analyzer": { + "whitespace_fingerprint": { + "tokenizer": "whitespace", + "filter": [ "fingerprint" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-fingerprint-tokenfilter-configure-parms] + +$$$analysis-fingerprint-tokenfilter-max-size$$$ + +`max_output_size` +: (Optional, integer) Maximum character length, including whitespace, of the output token. Defaults to `255`. Concatenated tokens longer than this will result in no token output. + +`separator` +: (Optional, string) Character to use to concatenate the token stream input. Defaults to a space. + + +## Customize [analysis-fingerprint-tokenfilter-customize] + +To customize the `fingerprint` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom `fingerprint` filter with that use `+` to concatenate token streams. The filter also limits output tokens to `100` characters or fewer. + +```console +PUT custom_fingerprint_example +{ + "settings": { + "analysis": { + "analyzer": { + "whitespace_": { + "tokenizer": "whitespace", + "filter": [ "fingerprint_plus_concat" ] + } + }, + "filter": { + "fingerprint_plus_concat": { + "type": "fingerprint", + "max_output_size": 100, + "separator": "+" + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-flatten-graph-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-flatten-graph-tokenfilter.md new file mode 100644 index 0000000000000..a6f665f6b66fc --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-flatten-graph-tokenfilter.md @@ -0,0 +1,108 @@ +--- +navigation_title: "Flatten graph" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-flatten-graph-tokenfilter.html +--- + +# Flatten graph token filter [analysis-flatten-graph-tokenfilter] + + +Flattens a [token graph](docs-content://manage-data/data-store/text-analysis/token-graphs.md) produced by a graph token filter, such as [`synonym_graph`](/reference/data-analysis/text-analysis/analysis-synonym-graph-tokenfilter.md) or [`word_delimiter_graph`](/reference/data-analysis/text-analysis/analysis-word-delimiter-graph-tokenfilter.md). + +Flattening a token graph containing [multi-position tokens](docs-content://manage-data/data-store/text-analysis/token-graphs.md#token-graphs-multi-position-tokens) makes the graph suitable for [indexing](docs-content://manage-data/data-store/text-analysis/index-search-analysis.md). Otherwise, indexing does not support token graphs containing multi-position tokens. + +::::{warning} +Flattening graphs is a lossy process. + +If possible, avoid using the `flatten_graph` filter. Instead, use graph token filters in [search analyzers](docs-content://manage-data/data-store/text-analysis/index-search-analysis.md) only. This eliminates the need for the `flatten_graph` filter. + +:::: + + +The `flatten_graph` filter uses Lucene’s [FlattenGraphFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/core/FlattenGraphFilter.md). + +## Example [analysis-flatten-graph-tokenfilter-analyze-ex] + +To see how the `flatten_graph` filter works, you first need to produce a token graph containing multi-position tokens. + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `synonym_graph` filter to add `dns` as a multi-position synonym for `domain name system` in the text `domain name system is fragile`: + +```console +GET /_analyze +{ + "tokenizer": "standard", + "filter": [ + { + "type": "synonym_graph", + "synonyms": [ "dns, domain name system" ] + } + ], + "text": "domain name system is fragile" +} +``` + +The filter produces the following token graph with `dns` as a multi-position token. + +:::{image} ../../../images/token-graph-dns-synonym-ex.svg +:alt: token graph dns synonym ex +::: + +Indexing does not support token graphs containing multi-position tokens. To make this token graph suitable for indexing, it needs to be flattened. + +To flatten the token graph, add the `flatten_graph` filter after the `synonym_graph` filter in the previous analyze API request. + +```console +GET /_analyze +{ + "tokenizer": "standard", + "filter": [ + { + "type": "synonym_graph", + "synonyms": [ "dns, domain name system" ] + }, + "flatten_graph" + ], + "text": "domain name system is fragile" +} +``` + +The filter produces the following flattened token graph, which is suitable for indexing. + +:::{image} ../../../images/token-graph-dns-invalid-ex.svg +:alt: token graph dns invalid ex +::: + + +## Add to an analyzer [analysis-keyword-marker-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `flatten_graph` token filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +In this analyzer, a custom `word_delimiter_graph` filter produces token graphs containing catenated, multi-position tokens. The `flatten_graph` filter flattens these token graphs, making them suitable for indexing. + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_custom_index_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": [ + "my_custom_word_delimiter_graph_filter", + "flatten_graph" + ] + } + }, + "filter": { + "my_custom_word_delimiter_graph_filter": { + "type": "word_delimiter_graph", + "catenate_all": true + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-htmlstrip-charfilter.md b/docs/reference/data-analysis/text-analysis/analysis-htmlstrip-charfilter.md new file mode 100644 index 0000000000000..70bd04244de47 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-htmlstrip-charfilter.md @@ -0,0 +1,99 @@ +--- +navigation_title: "HTML strip" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-htmlstrip-charfilter.html +--- + +# HTML strip character filter [analysis-htmlstrip-charfilter] + + +Strips HTML elements from a text and replaces HTML entities with their decoded value (e.g, replaces `&` with `&`). + +The `html_strip` filter uses Lucene’s [HTMLStripCharFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.md). + +## Example [analysis-htmlstrip-charfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `html_strip` filter to change the text `

I'm so happy!

` to `\nI'm so happy!\n`. + +```console +GET /_analyze +{ + "tokenizer": "keyword", + "char_filter": [ + "html_strip" + ], + "text": "

I'm so happy!

" +} +``` + +The filter produces the following text: + +```text +[ \nI'm so happy!\n ] +``` + + +## Add to an analyzer [analysis-htmlstrip-charfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `html_strip` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "keyword", + "char_filter": [ + "html_strip" + ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-htmlstrip-charfilter-configure-parms] + +`escaped_tags` +: (Optional, array of strings) Array of HTML elements without enclosing angle brackets (`< >`). The filter skips these HTML elements when stripping HTML from the text. For example, a value of `[ "p" ]` skips the `

` HTML element. + + +## Customize [analysis-htmlstrip-charfilter-customize] + +To customize the `html_strip` filter, duplicate it to create the basis for a new custom character filter. You can modify the filter using its configurable parameters. + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request configures a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md) using a custom `html_strip` filter, `my_custom_html_strip_char_filter`. + +The `my_custom_html_strip_char_filter` filter skips the removal of the `` HTML element. + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "keyword", + "char_filter": [ + "my_custom_html_strip_char_filter" + ] + } + }, + "char_filter": { + "my_custom_html_strip_char_filter": { + "type": "html_strip", + "escaped_tags": [ + "b" + ] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-hunspell-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-hunspell-tokenfilter.md new file mode 100644 index 0000000000000..e01ff9d055223 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-hunspell-tokenfilter.md @@ -0,0 +1,150 @@ +--- +navigation_title: "Hunspell" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-hunspell-tokenfilter.html +--- + +# Hunspell token filter [analysis-hunspell-tokenfilter] + + +Provides [dictionary stemming](docs-content://manage-data/data-store/text-analysis/stemming.md#dictionary-stemmers) based on a provided [Hunspell dictionary](https://en.wikipedia.org/wiki/Hunspell). The `hunspell` filter requires [configuration](#analysis-hunspell-tokenfilter-dictionary-config) of one or more language-specific Hunspell dictionaries. + +This filter uses Lucene’s [HunspellStemFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/hunspell/HunspellStemFilter.md). + +::::{tip} +If available, we recommend trying an algorithmic stemmer for your language before using the `hunspell` token filter. In practice, algorithmic stemmers typically outperform dictionary stemmers. See [Dictionary stemmers](docs-content://manage-data/data-store/text-analysis/stemming.md#dictionary-stemmers). + +:::: + + +## Configure Hunspell dictionaries [analysis-hunspell-tokenfilter-dictionary-config] + +Hunspell dictionaries are stored and detected on a dedicated `hunspell` directory on the filesystem: `<$ES_PATH_CONF>/hunspell`. Each dictionary is expected to have its own directory, named after its associated language and locale (e.g., `pt_BR`, `en_GB`). This dictionary directory is expected to hold a single `.aff` and one or more `.dic` files, all of which will automatically be picked up. For example, the following directory layout will define the `en_US` dictionary: + +```txt +- config + |-- hunspell + | |-- en_US + | | |-- en_US.dic + | | |-- en_US.aff +``` + +Each dictionary can be configured with one setting: + +$$$analysis-hunspell-ignore-case-settings$$$ + +`ignore_case` +: (Static, Boolean) If true, dictionary matching will be case insensitive. Defaults to `false`. + + This setting can be configured globally in `elasticsearch.yml` using `indices.analysis.hunspell.dictionary.ignore_case`. + + To configure the setting for a specific locale, use the `indices.analysis.hunspell.dictionary..ignore_case` setting (e.g., for the `en_US` (American English) locale, the setting is `indices.analysis.hunspell.dictionary.en_US.ignore_case`). + + You can also add a `settings.yml` file under the dictionary directory which holds these settings. This overrides any other `ignore_case` settings defined in `elasticsearch.yml`. + + + +## Example [analysis-hunspell-tokenfilter-analyze-ex] + +The following analyze API request uses the `hunspell` filter to stem `the foxes jumping quickly` to `the fox jump quick`. + +The request specifies the `en_US` locale, meaning that the `.aff` and `.dic` files in the `<$ES_PATH_CONF>/hunspell/en_US` directory are used for the Hunspell dictionary. + +```console +GET /_analyze +{ + "tokenizer": "standard", + "filter": [ + { + "type": "hunspell", + "locale": "en_US" + } + ], + "text": "the foxes jumping quickly" +} +``` + +The filter produces the following tokens: + +```text +[ the, fox, jump, quick ] +``` + + +## Configurable parameters [analysis-hunspell-tokenfilter-configure-parms] + +$$$analysis-hunspell-tokenfilter-dictionary-param$$$ + +`dictionary` +: (Optional, string or array of strings) One or more `.dic` files (e.g, `en_US.dic, my_custom.dic`) to use for the Hunspell dictionary. + + By default, the `hunspell` filter uses all `.dic` files in the `<$ES_PATH_CONF>/hunspell/` directory specified using the `lang`, `language`, or `locale` parameter. + + +`dedup` +: (Optional, Boolean) If `true`, duplicate tokens are removed from the filter’s output. Defaults to `true`. + +`lang` +: (Required*, string) An alias for the [`locale` parameter](#analysis-hunspell-tokenfilter-locale-param). + + If this parameter is not specified, the `language` or `locale` parameter is required. + + +`language` +: (Required*, string) An alias for the [`locale` parameter](#analysis-hunspell-tokenfilter-locale-param). + + If this parameter is not specified, the `lang` or `locale` parameter is required. + + +$$$analysis-hunspell-tokenfilter-locale-param$$$ + +`locale` +: (Required*, string) Locale directory used to specify the `.aff` and `.dic` files for a Hunspell dictionary. See [Configure Hunspell dictionaries](#analysis-hunspell-tokenfilter-dictionary-config). + + If this parameter is not specified, the `lang` or `language` parameter is required. + + +`longest_only` +: (Optional, Boolean) If `true`, only the longest stemmed version of each token is included in the output. If `false`, all stemmed versions of the token are included. Defaults to `false`. + + +## Customize and add to an analyzer [analysis-hunspell-tokenfilter-analyzer-ex] + +To customize the `hunspell` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses a custom `hunspell` filter, `my_en_US_dict_stemmer`, to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +The `my_en_US_dict_stemmer` filter uses a `locale` of `en_US`, meaning that the `.aff` and `.dic` files in the `<$ES_PATH_CONF>/hunspell/en_US` directory are used. The filter also includes a `dedup` argument of `false`, meaning that duplicate tokens added from the dictionary are not removed from the filter’s output. + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "en": { + "tokenizer": "standard", + "filter": [ "my_en_US_dict_stemmer" ] + } + }, + "filter": { + "my_en_US_dict_stemmer": { + "type": "hunspell", + "locale": "en_US", + "dedup": false + } + } + } + } +} +``` + + +## Settings [analysis-hunspell-tokenfilter-settings] + +In addition to the [`ignore_case` settings](#analysis-hunspell-ignore-case-settings), you can configure the following global settings for the `hunspell` filter using `elasticsearch.yml`: + +`indices.analysis.hunspell.dictionary.lazy` +: (Static, Boolean) If `true`, the loading of Hunspell dictionaries is deferred until a dictionary is used. If `false`, the dictionary directory is checked for dictionaries when the node starts, and any dictionaries are automatically loaded. Defaults to `false`. + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-hyp-decomp-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-hyp-decomp-tokenfilter.md new file mode 100644 index 0000000000000..cb2ae226f6eb9 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-hyp-decomp-tokenfilter.md @@ -0,0 +1,124 @@ +--- +navigation_title: "Hyphenation decompounder" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-hyp-decomp-tokenfilter.html +--- + +# Hyphenation decompounder token filter [analysis-hyp-decomp-tokenfilter] + + +Uses XML-based hyphenation patterns to find potential subwords in compound words. These subwords are then checked against the specified word list. Subwords not in the list are excluded from the token output. + +This filter uses Lucene’s [HyphenationCompoundWordTokenFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.md), which was built for Germanic languages. + +## Example [analysis-hyp-decomp-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `hyphenation_decompounder` filter to find subwords in `Kaffeetasse` based on German hyphenation patterns in the `analysis/hyphenation_patterns.xml` file. The filter then checks these subwords against a list of specified words: `kaffee`, `zucker`, and `tasse`. + +```console +GET _analyze +{ + "tokenizer": "standard", + "filter": [ + { + "type": "hyphenation_decompounder", + "hyphenation_patterns_path": "analysis/hyphenation_patterns.xml", + "word_list": ["Kaffee", "zucker", "tasse"] + } + ], + "text": "Kaffeetasse" +} +``` + +The filter produces the following tokens: + +```text +[ Kaffeetasse, Kaffee, tasse ] +``` + + +## Configurable parameters [analysis-hyp-decomp-tokenfilter-configure-parms] + +`hyphenation_patterns_path` +: (Required, string) Path to an Apache FOP (Formatting Objects Processor) XML hyphenation pattern file. + +This path must be absolute or relative to the `config` location. Only FOP v1.2 compatible files are supported. + +For example FOP XML hyphenation pattern files, refer to: + +* [Objects For Formatting Objects (OFFO) Sourceforge project](http://offo.sourceforge.net/#FOP+XML+Hyphenation+Patterns) +* [offo-hyphenation_v1.2.zip direct download](https://sourceforge.net/projects/offo/files/offo-hyphenation/1.2/offo-hyphenation_v1.2.zip/download) (v2.0 and above hyphenation pattern files are not supported) + + +`word_list` +: (Required*, array of strings) A list of subwords. Subwords found using the hyphenation pattern but not in this list are excluded from the token output. + +You can use the [`dictionary_decompounder`](/reference/data-analysis/text-analysis/analysis-dict-decomp-tokenfilter.md) filter to test the quality of word lists before implementing them. + +Either this parameter or `word_list_path` must be specified. + + +`word_list_path` +: (Required*, string) Path to a file containing a list of subwords. Subwords found using the hyphenation pattern but not in this list are excluded from the token output. + +This path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each token in the file must be separated by a line break. + +You can use the [`dictionary_decompounder`](/reference/data-analysis/text-analysis/analysis-dict-decomp-tokenfilter.md) filter to test the quality of word lists before implementing them. + +Either this parameter or `word_list` must be specified. + + +`max_subword_size` +: (Optional, integer) Maximum subword character length. Longer subword tokens are excluded from the output. Defaults to `15`. + +`min_subword_size` +: (Optional, integer) Minimum subword character length. Shorter subword tokens are excluded from the output. Defaults to `2`. + +`min_word_size` +: (Optional, integer) Minimum word character length. Shorter word tokens are excluded from the output. Defaults to `5`. + +`only_longest_match` +: (Optional, Boolean) If `true`, only include the longest matching subword. Defaults to `false`. + +`no_sub_matches` +: (Optional, Boolean) If `true`, do not match sub tokens in tokens that are in the word list. Defaults to `false`. + +`no_overlapping_matches` +: (Optional, Boolean) If `true`, do not allow overlapping tokens. Defaults to `false`. + +Typically users will only want to include one of the three flags as enabling `no_overlapping_matches` is the most restrictive and `no_sub_matches` is more restrictive than `only_longest_match`. When enabling a more restrictive option the state of the less restrictive does not have any effect. + + +## Customize and add to an analyzer [analysis-hyp-decomp-tokenfilter-customize] + +To customize the `hyphenation_decompounder` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses a custom `hyphenation_decompounder` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +The custom `hyphenation_decompounder` filter find subwords based on hyphenation patterns in the `analysis/hyphenation_patterns.xml` file. The filter then checks these subwords against the list of words specified in the `analysis/example_word_list.txt` file. Subwords longer than 22 characters are excluded from the token output. + +```console +PUT hyphenation_decompound_example +{ + "settings": { + "analysis": { + "analyzer": { + "standard_hyphenation_decompound": { + "tokenizer": "standard", + "filter": [ "22_char_hyphenation_decompound" ] + } + }, + "filter": { + "22_char_hyphenation_decompound": { + "type": "hyphenation_decompounder", + "word_list_path": "analysis/example_word_list.txt", + "hyphenation_patterns_path": "analysis/hyphenation_patterns.xml", + "max_subword_size": 22 + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-keep-types-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-keep-types-tokenfilter.md new file mode 100644 index 0000000000000..ce22b7fbdc971 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-keep-types-tokenfilter.md @@ -0,0 +1,123 @@ +--- +navigation_title: "Keep types" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-keep-types-tokenfilter.html +--- + +# Keep types token filter [analysis-keep-types-tokenfilter] + + +Keeps or removes tokens of a specific type. For example, you can use this filter to change `3 quick foxes` to `quick foxes` by keeping only `` (alphanumeric) tokens. + +::::{admonition} Token types +:class: note + +Token types are set by the [tokenizer](/reference/data-analysis/text-analysis/tokenizer-reference.md) when converting characters to tokens. Token types can vary between tokenizers. + +For example, the [`standard`](/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md) tokenizer can produce a variety of token types, including ``, ``, and ``. Simpler analyzers, like the [`lowercase`](/reference/data-analysis/text-analysis/analysis-lowercase-tokenizer.md) tokenizer, only produce the `word` token type. + +Certain token filters can also add token types. For example, the [`synonym`](/reference/data-analysis/text-analysis/analysis-synonym-tokenfilter.md) filter can add the `` token type. + +Some tokenizers don’t support this token filter, for example keyword, simple_pattern, and simple_pattern_split tokenizers, as they don’t support setting the token type attribute. + +:::: + + +This filter uses Lucene’s [TypeTokenFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/core/TypeTokenFilter.md). + +## Include example [analysis-keep-types-tokenfilter-analyze-include-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `keep_types` filter to keep only `` (numeric) tokens from `1 quick fox 2 lazy dogs`. + +```console +GET _analyze +{ + "tokenizer": "standard", + "filter": [ + { + "type": "keep_types", + "types": [ "" ] + } + ], + "text": "1 quick fox 2 lazy dogs" +} +``` + +The filter produces the following tokens: + +```text +[ 1, 2 ] +``` + + +## Exclude example [analysis-keep-types-tokenfilter-analyze-exclude-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `keep_types` filter to remove `` tokens from `1 quick fox 2 lazy dogs`. Note the `mode` parameter is set to `exclude`. + +```console +GET _analyze +{ + "tokenizer": "standard", + "filter": [ + { + "type": "keep_types", + "types": [ "" ], + "mode": "exclude" + } + ], + "text": "1 quick fox 2 lazy dogs" +} +``` + +The filter produces the following tokens: + +```text +[ quick, fox, lazy, dogs ] +``` + + +## Configurable parameters [analysis-keep-types-tokenfilter-configure-parms] + +`types` +: (Required, array of strings) List of token types to keep or remove. + +`mode` +: (Optional, string) Indicates whether to keep or remove the specified token types. Valid values are: + + `include` + : (Default) Keep only the specified token types. + + `exclude` + : Remove the specified token types. + + + +## Customize and add to an analyzer [analysis-keep-types-tokenfilter-customize] + +To customize the `keep_types` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses a custom `keep_types` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). The custom `keep_types` filter keeps only `` (alphanumeric) tokens. + +```console +PUT keep_types_example +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "standard", + "filter": [ "extract_alpha" ] + } + }, + "filter": { + "extract_alpha": { + "type": "keep_types", + "types": [ "" ] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-keep-words-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-keep-words-tokenfilter.md new file mode 100644 index 0000000000000..999bccbabee5f --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-keep-words-tokenfilter.md @@ -0,0 +1,104 @@ +--- +navigation_title: "Keep words" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-keep-words-tokenfilter.html +--- + +# Keep words token filter [analysis-keep-words-tokenfilter] + + +Keeps only tokens contained in a specified word list. + +This filter uses Lucene’s [KeepWordFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.md). + +::::{note} +To remove a list of words from a token stream, use the [`stop`](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) filter. + +:::: + + +## Example [analysis-keep-words-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `keep` filter to keep only the `fox` and `dog` tokens from `the quick fox jumps over the lazy dog`. + +```console +GET _analyze +{ + "tokenizer": "whitespace", + "filter": [ + { + "type": "keep", + "keep_words": [ "dog", "elephant", "fox" ] + } + ], + "text": "the quick fox jumps over the lazy dog" +} +``` + +The filter produces the following tokens: + +```text +[ fox, dog ] +``` + + +## Configurable parameters [analysis-keep-words-tokenfilter-configure-parms] + +`keep_words` +: (Required*, array of strings) List of words to keep. Only tokens that match words in this list are included in the output. + +Either this parameter or `keep_words_path` must be specified. + + +`keep_words_path` +: (Required*, array of strings) Path to a file that contains a list of words to keep. Only tokens that match words in this list are included in the output. + +This path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each word in the file must be separated by a line break. + +Either this parameter or `keep_words` must be specified. + + +`keep_words_case` +: (Optional, Boolean) If `true`, lowercase all keep words. Defaults to `false`. + + +## Customize and add to an analyzer [analysis-keep-words-tokenfilter-customize] + +To customize the `keep` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses custom `keep` filters to configure two new [custom analyzers](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md): + +* `standard_keep_word_array`, which uses a custom `keep` filter with an inline array of keep words +* `standard_keep_word_file`, which uses a customer `keep` filter with a keep words file + +```console +PUT keep_words_example +{ + "settings": { + "analysis": { + "analyzer": { + "standard_keep_word_array": { + "tokenizer": "standard", + "filter": [ "keep_word_array" ] + }, + "standard_keep_word_file": { + "tokenizer": "standard", + "filter": [ "keep_word_file" ] + } + }, + "filter": { + "keep_word_array": { + "type": "keep", + "keep_words": [ "one", "two", "three" ] + }, + "keep_word_file": { + "type": "keep", + "keep_words_path": "analysis/example_word_list.txt" + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-keyword-analyzer.md b/docs/reference/data-analysis/text-analysis/analysis-keyword-analyzer.md new file mode 100644 index 0000000000000..0e674a9477282 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-keyword-analyzer.md @@ -0,0 +1,64 @@ +--- +navigation_title: "Keyword" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-keyword-analyzer.html +--- + +# Keyword analyzer [analysis-keyword-analyzer] + + +The `keyword` analyzer is a noop analyzer which returns the entire input string as a single token. + + +## Example output [_example_output_2] + +```console +POST _analyze +{ + "analyzer": "keyword", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above sentence would produce the following single term: + +```text +[ The 2 QUICK Brown-Foxes jumped over the lazy dog's bone. ] +``` + + +## Configuration [_configuration_3] + +The `keyword` analyzer is not configurable. + + +## Definition [_definition_2] + +The `keyword` analyzer consists of: + +Tokenizer +: * [Keyword Tokenizer](/reference/data-analysis/text-analysis/analysis-keyword-tokenizer.md) + + +If you need to customize the `keyword` analyzer then you need to recreate it as a `custom` analyzer and modify it, usually by adding token filters. Usually, you should prefer the [Keyword type](/reference/elasticsearch/mapping-reference/keyword.md) when you want strings that are not split into tokens, but just in case you need it, this would recreate the built-in `keyword` analyzer and you can use it as a starting point for further customization: + +```console +PUT /keyword_example +{ + "settings": { + "analysis": { + "analyzer": { + "rebuilt_keyword": { + "tokenizer": "keyword", + "filter": [ <1> + ] + } + } + } + } +} +``` + +1. You’d add any token filters here. + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-keyword-marker-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-keyword-marker-tokenfilter.md new file mode 100644 index 0000000000000..462d0d82d58eb --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-keyword-marker-tokenfilter.md @@ -0,0 +1,275 @@ +--- +navigation_title: "Keyword marker" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-keyword-marker-tokenfilter.html +--- + +# Keyword marker token filter [analysis-keyword-marker-tokenfilter] + + +Marks specified tokens as keywords, which are not stemmed. + +The `keyword_marker` filter assigns specified tokens a `keyword` attribute of `true`. Stemmer token filters, such as [`stemmer`](/reference/data-analysis/text-analysis/analysis-stemmer-tokenfilter.md) or [`porter_stem`](/reference/data-analysis/text-analysis/analysis-porterstem-tokenfilter.md), skip tokens with a `keyword` attribute of `true`. + +::::{important} +To work properly, the `keyword_marker` filter must be listed before any stemmer token filters in the [analyzer configuration](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +:::: + + +The `keyword_marker` filter uses Lucene’s [KeywordMarkerFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.md). + +## Example [analysis-keyword-marker-tokenfilter-analyze-ex] + +To see how the `keyword_marker` filter works, you first need to produce a token stream containing stemmed tokens. + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the [`stemmer`](/reference/data-analysis/text-analysis/analysis-stemmer-tokenfilter.md) filter to create stemmed tokens for `fox running and jumping`. + +```console +GET /_analyze +{ + "tokenizer": "whitespace", + "filter": [ "stemmer" ], + "text": "fox running and jumping" +} +``` + +The request produces the following tokens. Note that `running` was stemmed to `run` and `jumping` was stemmed to `jump`. + +```text +[ fox, run, and, jump ] +``` + +To prevent `jumping` from being stemmed, add the `keyword_marker` filter before the `stemmer` filter in the previous analyze API request. Specify `jumping` in the `keywords` parameter of the `keyword_marker` filter. + +```console +GET /_analyze +{ + "tokenizer": "whitespace", + "filter": [ + { + "type": "keyword_marker", + "keywords": [ "jumping" ] + }, + "stemmer" + ], + "text": "fox running and jumping" +} +``` + +The request produces the following tokens. `running` is still stemmed to `run`, but `jumping` is not stemmed. + +```text +[ fox, run, and, jumping ] +``` + +To see the `keyword` attribute for these tokens, add the following arguments to the analyze API request: + +* `explain`: `true` +* `attributes`: `keyword` + +```console +GET /_analyze +{ + "tokenizer": "whitespace", + "filter": [ + { + "type": "keyword_marker", + "keywords": [ "jumping" ] + }, + "stemmer" + ], + "text": "fox running and jumping", + "explain": true, + "attributes": "keyword" +} +``` + +The API returns the following response. Note the `jumping` token has a `keyword` attribute of `true`. + +```console-result +{ + "detail": { + "custom_analyzer": true, + "charfilters": [], + "tokenizer": { + "name": "whitespace", + "tokens": [ + { + "token": "fox", + "start_offset": 0, + "end_offset": 3, + "type": "word", + "position": 0 + }, + { + "token": "running", + "start_offset": 4, + "end_offset": 11, + "type": "word", + "position": 1 + }, + { + "token": "and", + "start_offset": 12, + "end_offset": 15, + "type": "word", + "position": 2 + }, + { + "token": "jumping", + "start_offset": 16, + "end_offset": 23, + "type": "word", + "position": 3 + } + ] + }, + "tokenfilters": [ + { + "name": "__anonymous__keyword_marker", + "tokens": [ + { + "token": "fox", + "start_offset": 0, + "end_offset": 3, + "type": "word", + "position": 0, + "keyword": false + }, + { + "token": "running", + "start_offset": 4, + "end_offset": 11, + "type": "word", + "position": 1, + "keyword": false + }, + { + "token": "and", + "start_offset": 12, + "end_offset": 15, + "type": "word", + "position": 2, + "keyword": false + }, + { + "token": "jumping", + "start_offset": 16, + "end_offset": 23, + "type": "word", + "position": 3, + "keyword": true + } + ] + }, + { + "name": "stemmer", + "tokens": [ + { + "token": "fox", + "start_offset": 0, + "end_offset": 3, + "type": "word", + "position": 0, + "keyword": false + }, + { + "token": "run", + "start_offset": 4, + "end_offset": 11, + "type": "word", + "position": 1, + "keyword": false + }, + { + "token": "and", + "start_offset": 12, + "end_offset": 15, + "type": "word", + "position": 2, + "keyword": false + }, + { + "token": "jumping", + "start_offset": 16, + "end_offset": 23, + "type": "word", + "position": 3, + "keyword": true + } + ] + } + ] + } +} +``` + + +## Configurable parameters [analysis-keyword-marker-tokenfilter-configure-parms] + +`ignore_case` +: (Optional, Boolean) If `true`, matching for the `keywords` and `keywords_path` parameters ignores letter case. Defaults to `false`. + +`keywords` +: (Required*, array of strings) Array of keywords. Tokens that match these keywords are not stemmed. + + This parameter, `keywords_path`, or `keywords_pattern` must be specified. You cannot specify this parameter and `keywords_pattern`. + + +`keywords_path` +: (Required*, string) Path to a file that contains a list of keywords. Tokens that match these keywords are not stemmed. + +This path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each word in the file must be separated by a line break. + +This parameter, `keywords`, or `keywords_pattern` must be specified. You cannot specify this parameter and `keywords_pattern`. + + +`keywords_pattern` +: (Required*, string) [Java regular expression](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md) used to match tokens. Tokens that match this expression are marked as keywords and not stemmed. + +This parameter, `keywords`, or `keywords_path` must be specified. You cannot specify this parameter and `keywords` or `keywords_pattern`. + +::::{warning} +Poorly written regular expressions can cause {{es}} to run slowly or result in stack overflow errors, causing the running node to suddenly exit. + +:::: + + + + +## Customize and add to an analyzer [analysis-keyword-marker-tokenfilter-customize] + +To customize the `keyword_marker` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses a custom `keyword_marker` filter and the `porter_stem` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +The custom `keyword_marker` filter marks tokens specified in the `analysis/example_word_list.txt` file as keywords. The `porter_stem` filter does not stem these tokens. + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_custom_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": [ + "my_custom_keyword_marker_filter", + "porter_stem" + ] + } + }, + "filter": { + "my_custom_keyword_marker_filter": { + "type": "keyword_marker", + "keywords_path": "analysis/example_word_list.txt" + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-keyword-repeat-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-keyword-repeat-tokenfilter.md new file mode 100644 index 0000000000000..fd42a15a5bdff --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-keyword-repeat-tokenfilter.md @@ -0,0 +1,370 @@ +--- +navigation_title: "Keyword repeat" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-keyword-repeat-tokenfilter.html +--- + +# Keyword repeat token filter [analysis-keyword-repeat-tokenfilter] + + +Outputs a keyword version of each token in a stream. These keyword tokens are not stemmed. + +The `keyword_repeat` filter assigns keyword tokens a `keyword` attribute of `true`. Stemmer token filters, such as [`stemmer`](/reference/data-analysis/text-analysis/analysis-stemmer-tokenfilter.md) or [`porter_stem`](/reference/data-analysis/text-analysis/analysis-porterstem-tokenfilter.md), skip tokens with a `keyword` attribute of `true`. + +You can use the `keyword_repeat` filter with a stemmer token filter to output a stemmed and unstemmed version of each token in a stream. + +::::{important} +To work properly, the `keyword_repeat` filter must be listed before any stemmer token filters in the [analyzer configuration](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +Stemming does not affect all tokens. This means streams could contain duplicate tokens in the same position, even after stemming. + +To remove these duplicate tokens, add the [`remove_duplicates`](/reference/data-analysis/text-analysis/analysis-remove-duplicates-tokenfilter.md) filter after the stemmer filter in the analyzer configuration. + +:::: + + +The `keyword_repeat` filter uses Lucene’s [KeywordRepeatFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilter.md). + +## Example [analysis-keyword-repeat-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `keyword_repeat` filter to output a keyword and non-keyword version of each token in `fox running and jumping`. + +To return the `keyword` attribute for these tokens, the analyze API request also includes the following arguments: + +* `explain`: `true` +* `attributes`: `keyword` + +```console +GET /_analyze +{ + "tokenizer": "whitespace", + "filter": [ + "keyword_repeat" + ], + "text": "fox running and jumping", + "explain": true, + "attributes": "keyword" +} +``` + +The API returns the following response. Note that one version of each token has a `keyword` attribute of `true`. + +::::{dropdown} **Response** +```console-result +{ + "detail": { + "custom_analyzer": true, + "charfilters": [], + "tokenizer": ..., + "tokenfilters": [ + { + "name": "keyword_repeat", + "tokens": [ + { + "token": "fox", + "start_offset": 0, + "end_offset": 3, + "type": "word", + "position": 0, + "keyword": true + }, + { + "token": "fox", + "start_offset": 0, + "end_offset": 3, + "type": "word", + "position": 0, + "keyword": false + }, + { + "token": "running", + "start_offset": 4, + "end_offset": 11, + "type": "word", + "position": 1, + "keyword": true + }, + { + "token": "running", + "start_offset": 4, + "end_offset": 11, + "type": "word", + "position": 1, + "keyword": false + }, + { + "token": "and", + "start_offset": 12, + "end_offset": 15, + "type": "word", + "position": 2, + "keyword": true + }, + { + "token": "and", + "start_offset": 12, + "end_offset": 15, + "type": "word", + "position": 2, + "keyword": false + }, + { + "token": "jumping", + "start_offset": 16, + "end_offset": 23, + "type": "word", + "position": 3, + "keyword": true + }, + { + "token": "jumping", + "start_offset": 16, + "end_offset": 23, + "type": "word", + "position": 3, + "keyword": false + } + ] + } + ] + } +} +``` + +:::: + + +To stem the non-keyword tokens, add the `stemmer` filter after the `keyword_repeat` filter in the previous analyze API request. + +```console +GET /_analyze +{ + "tokenizer": "whitespace", + "filter": [ + "keyword_repeat", + "stemmer" + ], + "text": "fox running and jumping", + "explain": true, + "attributes": "keyword" +} +``` + +The API returns the following response. Note the following changes: + +* The non-keyword version of `running` was stemmed to `run`. +* The non-keyword version of `jumping` was stemmed to `jump`. + +::::{dropdown} **Response** +```console-result +{ + "detail": { + "custom_analyzer": true, + "charfilters": [], + "tokenizer": ..., + "tokenfilters": [ + { + "name": "keyword_repeat", + "tokens": ... + }, + { + "name": "stemmer", + "tokens": [ + { + "token": "fox", + "start_offset": 0, + "end_offset": 3, + "type": "word", + "position": 0, + "keyword": true + }, + { + "token": "fox", + "start_offset": 0, + "end_offset": 3, + "type": "word", + "position": 0, + "keyword": false + }, + { + "token": "running", + "start_offset": 4, + "end_offset": 11, + "type": "word", + "position": 1, + "keyword": true + }, + { + "token": "run", + "start_offset": 4, + "end_offset": 11, + "type": "word", + "position": 1, + "keyword": false + }, + { + "token": "and", + "start_offset": 12, + "end_offset": 15, + "type": "word", + "position": 2, + "keyword": true + }, + { + "token": "and", + "start_offset": 12, + "end_offset": 15, + "type": "word", + "position": 2, + "keyword": false + }, + { + "token": "jumping", + "start_offset": 16, + "end_offset": 23, + "type": "word", + "position": 3, + "keyword": true + }, + { + "token": "jump", + "start_offset": 16, + "end_offset": 23, + "type": "word", + "position": 3, + "keyword": false + } + ] + } + ] + } +} +``` + +:::: + + +However, the keyword and non-keyword versions of `fox` and `and` are identical and in the same respective positions. + +To remove these duplicate tokens, add the `remove_duplicates` filter after `stemmer` in the analyze API request. + +```console +GET /_analyze +{ + "tokenizer": "whitespace", + "filter": [ + "keyword_repeat", + "stemmer", + "remove_duplicates" + ], + "text": "fox running and jumping", + "explain": true, + "attributes": "keyword" +} +``` + +The API returns the following response. Note that the duplicate tokens for `fox` and `and` have been removed. + +::::{dropdown} **Response** +```console-result +{ + "detail": { + "custom_analyzer": true, + "charfilters": [], + "tokenizer": ..., + "tokenfilters": [ + { + "name": "keyword_repeat", + "tokens": ... + }, + { + "name": "stemmer", + "tokens": ... + }, + { + "name": "remove_duplicates", + "tokens": [ + { + "token": "fox", + "start_offset": 0, + "end_offset": 3, + "type": "word", + "position": 0, + "keyword": true + }, + { + "token": "running", + "start_offset": 4, + "end_offset": 11, + "type": "word", + "position": 1, + "keyword": true + }, + { + "token": "run", + "start_offset": 4, + "end_offset": 11, + "type": "word", + "position": 1, + "keyword": false + }, + { + "token": "and", + "start_offset": 12, + "end_offset": 15, + "type": "word", + "position": 2, + "keyword": true + }, + { + "token": "jumping", + "start_offset": 16, + "end_offset": 23, + "type": "word", + "position": 3, + "keyword": true + }, + { + "token": "jump", + "start_offset": 16, + "end_offset": 23, + "type": "word", + "position": 3, + "keyword": false + } + ] + } + ] + } +} +``` + +:::: + + + +## Add to an analyzer [analysis-keyword-repeat-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `keyword_repeat` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +This custom analyzer uses the `keyword_repeat` and `porter_stem` filters to create a stemmed and unstemmed version of each token in a stream. The `remove_duplicates` filter then removes any duplicate tokens from the stream. + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_custom_analyzer": { + "tokenizer": "standard", + "filter": [ + "keyword_repeat", + "porter_stem", + "remove_duplicates" + ] + } + } + } + } +} +``` diff --git a/docs/reference/data-analysis/text-analysis/analysis-keyword-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-keyword-tokenizer.md new file mode 100644 index 0000000000000..b3458a2ced566 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-keyword-tokenizer.md @@ -0,0 +1,58 @@ +--- +navigation_title: "Keyword" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-keyword-tokenizer.html +--- + +# Keyword tokenizer [analysis-keyword-tokenizer] + + +The `keyword` tokenizer is a noop tokenizer that accepts whatever text it is given and outputs the exact same text as a single term. It can be combined with token filters to normalise output, e.g. lower-casing email addresses. + + +## Example output [_example_output_10] + +```console +POST _analyze +{ + "tokenizer": "keyword", + "text": "New York" +} +``` + +The above sentence would produce the following term: + +```text +[ New York ] +``` + + +## Combine with token filters [analysis-keyword-tokenizer-token-filters] + +You can combine the `keyword` tokenizer with token filters to normalise structured data, such as product IDs or email addresses. + +For example, the following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `keyword` tokenizer and [`lowercase`](/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md) filter to convert an email address to lowercase. + +```console +POST _analyze +{ + "tokenizer": "keyword", + "filter": [ "lowercase" ], + "text": "john.SMITH@example.COM" +} +``` + +The request produces the following token: + +```text +[ john.smith@example.com ] +``` + + +## Configuration [_configuration_11] + +The `keyword` tokenizer accepts the following parameters: + +`buffer_size` +: The number of characters read into the term buffer in a single pass. Defaults to `256`. The term buffer will grow by this size until all the text has been consumed. It is advisable not to change this setting. + diff --git a/docs/reference/data-analysis/text-analysis/analysis-kstem-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-kstem-tokenfilter.md new file mode 100644 index 0000000000000..bf352093e53fa --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-kstem-tokenfilter.md @@ -0,0 +1,67 @@ +--- +navigation_title: "KStem" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-kstem-tokenfilter.html +--- + +# KStem token filter [analysis-kstem-tokenfilter] + + +Provides [KStem](https://ciir.cs.umass.edu/pubfiles/ir-35.pdf)-based stemming for the English language. The `kstem` filter combines [algorithmic stemming](docs-content://manage-data/data-store/text-analysis/stemming.md#algorithmic-stemmers) with a built-in [dictionary](docs-content://manage-data/data-store/text-analysis/stemming.md#dictionary-stemmers). + +The `kstem` filter tends to stem less aggressively than other English stemmer filters, such as the [`porter_stem`](/reference/data-analysis/text-analysis/analysis-porterstem-tokenfilter.md) filter. + +The `kstem` filter is equivalent to the [`stemmer`](/reference/data-analysis/text-analysis/analysis-stemmer-tokenfilter.md) filter’s [`light_english`](/reference/data-analysis/text-analysis/analysis-stemmer-tokenfilter.md#analysis-stemmer-tokenfilter-language-parm) variant. + +This filter uses Lucene’s [KStemFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/en/KStemFilter.md). + +## Example [analysis-kstem-tokenfilter-analyze-ex] + +The following analyze API request uses the `kstem` filter to stem `the foxes jumping quickly` to `the fox jump quick`: + +```console +GET /_analyze +{ + "tokenizer": "standard", + "filter": [ "kstem" ], + "text": "the foxes jumping quickly" +} +``` + +The filter produces the following tokens: + +```text +[ the, fox, jump, quick ] +``` + + +## Add to an analyzer [analysis-kstem-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `kstem` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +::::{important} +To work properly, the `kstem` filter requires lowercase tokens. To ensure tokens are lowercased, add the [`lowercase`](/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md) filter before the `kstem` filter in the analyzer configuration. + +:::: + + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "whitespace", + "filter": [ + "lowercase", + "kstem" + ] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-lang-analyzer.md b/docs/reference/data-analysis/text-analysis/analysis-lang-analyzer.md new file mode 100644 index 0000000000000..3e1c651805a73 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-lang-analyzer.md @@ -0,0 +1,1692 @@ +--- +navigation_title: "Language" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-lang-analyzer.html +--- + +# Language analyzers [analysis-lang-analyzer] + + +A set of analyzers aimed at analyzing specific language text. The following types are supported: [`arabic`](#arabic-analyzer), [`armenian`](#armenian-analyzer), [`basque`](#basque-analyzer), [`bengali`](#bengali-analyzer), [`brazilian`](#brazilian-analyzer), [`bulgarian`](#bulgarian-analyzer), [`catalan`](#catalan-analyzer), [`cjk`](#cjk-analyzer), [`czech`](#czech-analyzer), [`danish`](#danish-analyzer), [`dutch`](#dutch-analyzer), [`english`](#english-analyzer), [`estonian`](#estonian-analyzer), [`finnish`](#finnish-analyzer), [`french`](#french-analyzer), [`galician`](#galician-analyzer), [`german`](#german-analyzer), [`greek`](#greek-analyzer), [`hindi`](#hindi-analyzer), [`hungarian`](#hungarian-analyzer), [`indonesian`](#indonesian-analyzer), [`irish`](#irish-analyzer), [`italian`](#italian-analyzer), [`latvian`](#latvian-analyzer), [`lithuanian`](#lithuanian-analyzer), [`norwegian`](#norwegian-analyzer), [`persian`](#persian-analyzer), [`portuguese`](#portuguese-analyzer), [`romanian`](#romanian-analyzer), [`russian`](#russian-analyzer), [`serbian`](#serbian-analyzer), [`sorani`](#sorani-analyzer), [`spanish`](#spanish-analyzer), [`swedish`](#swedish-analyzer), [`turkish`](#turkish-analyzer), [`thai`](#thai-analyzer). + +## Configuring language analyzers [_configuring_language_analyzers] + +### Stopwords [_stopwords] + +All analyzers support setting custom `stopwords` either internally in the config, or by using an external stopwords file by setting `stopwords_path`. Check [Stop Analyzer](/reference/data-analysis/text-analysis/analysis-stop-analyzer.md) for more details. + + +### Excluding words from stemming [_excluding_words_from_stemming] + +The `stem_exclusion` parameter allows you to specify an array of lowercase words that should not be stemmed. Internally, this functionality is implemented by adding the [`keyword_marker` token filter](/reference/data-analysis/text-analysis/analysis-keyword-marker-tokenfilter.md) with the `keywords` set to the value of the `stem_exclusion` parameter. + +The following analyzers support setting custom `stem_exclusion` list: `arabic`, `armenian`, `basque`, `bengali`, `bulgarian`, `catalan`, `czech`, `dutch`, `english`, `finnish`, `french`, `galician`, `german`, `hindi`, `hungarian`, `indonesian`, `irish`, `italian`, `latvian`, `lithuanian`, `norwegian`, `portuguese`, `romanian`, `russian`, `serbian`, `sorani`, `spanish`, `swedish`, `turkish`. + + + +## Reimplementing language analyzers [_reimplementing_language_analyzers] + +The built-in language analyzers can be reimplemented as `custom` analyzers (as described below) in order to customize their behaviour. + +::::{note} +If you do not intend to exclude words from being stemmed (the equivalent of the `stem_exclusion` parameter above), then you should remove the `keyword_marker` token filter from the custom analyzer configuration. +:::: + + +### `arabic` analyzer [arabic-analyzer] + +The `arabic` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /arabic_example +{ + "settings": { + "analysis": { + "filter": { + "arabic_stop": { + "type": "stop", + "stopwords": "_arabic_" <1> + }, + "arabic_keywords": { + "type": "keyword_marker", + "keywords": ["مثال"] <2> + }, + "arabic_stemmer": { + "type": "stemmer", + "language": "arabic" + } + }, + "analyzer": { + "rebuilt_arabic": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "decimal_digit", + "arabic_stop", + "arabic_normalization", + "arabic_keywords", + "arabic_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `armenian` analyzer [armenian-analyzer] + +The `armenian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /armenian_example +{ + "settings": { + "analysis": { + "filter": { + "armenian_stop": { + "type": "stop", + "stopwords": "_armenian_" <1> + }, + "armenian_keywords": { + "type": "keyword_marker", + "keywords": ["օրինակ"] <2> + }, + "armenian_stemmer": { + "type": "stemmer", + "language": "armenian" + } + }, + "analyzer": { + "rebuilt_armenian": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "armenian_stop", + "armenian_keywords", + "armenian_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `basque` analyzer [basque-analyzer] + +The `basque` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /basque_example +{ + "settings": { + "analysis": { + "filter": { + "basque_stop": { + "type": "stop", + "stopwords": "_basque_" <1> + }, + "basque_keywords": { + "type": "keyword_marker", + "keywords": ["Adibidez"] <2> + }, + "basque_stemmer": { + "type": "stemmer", + "language": "basque" + } + }, + "analyzer": { + "rebuilt_basque": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "basque_stop", + "basque_keywords", + "basque_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `bengali` analyzer [bengali-analyzer] + +The `bengali` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /bengali_example +{ + "settings": { + "analysis": { + "filter": { + "bengali_stop": { + "type": "stop", + "stopwords": "_bengali_" <1> + }, + "bengali_keywords": { + "type": "keyword_marker", + "keywords": ["উদাহরণ"] <2> + }, + "bengali_stemmer": { + "type": "stemmer", + "language": "bengali" + } + }, + "analyzer": { + "rebuilt_bengali": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "decimal_digit", + "bengali_keywords", + "indic_normalization", + "bengali_normalization", + "bengali_stop", + "bengali_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `brazilian` analyzer [brazilian-analyzer] + +The `brazilian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /brazilian_example +{ + "settings": { + "analysis": { + "filter": { + "brazilian_stop": { + "type": "stop", + "stopwords": "_brazilian_" <1> + }, + "brazilian_keywords": { + "type": "keyword_marker", + "keywords": ["exemplo"] <2> + }, + "brazilian_stemmer": { + "type": "stemmer", + "language": "brazilian" + } + }, + "analyzer": { + "rebuilt_brazilian": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "brazilian_stop", + "brazilian_keywords", + "brazilian_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `bulgarian` analyzer [bulgarian-analyzer] + +The `bulgarian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /bulgarian_example +{ + "settings": { + "analysis": { + "filter": { + "bulgarian_stop": { + "type": "stop", + "stopwords": "_bulgarian_" <1> + }, + "bulgarian_keywords": { + "type": "keyword_marker", + "keywords": ["пример"] <2> + }, + "bulgarian_stemmer": { + "type": "stemmer", + "language": "bulgarian" + } + }, + "analyzer": { + "rebuilt_bulgarian": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "bulgarian_stop", + "bulgarian_keywords", + "bulgarian_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `catalan` analyzer [catalan-analyzer] + +The `catalan` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /catalan_example +{ + "settings": { + "analysis": { + "filter": { + "catalan_elision": { + "type": "elision", + "articles": [ "d", "l", "m", "n", "s", "t"], + "articles_case": true + }, + "catalan_stop": { + "type": "stop", + "stopwords": "_catalan_" <1> + }, + "catalan_keywords": { + "type": "keyword_marker", + "keywords": ["example"] <2> + }, + "catalan_stemmer": { + "type": "stemmer", + "language": "catalan" + } + }, + "analyzer": { + "rebuilt_catalan": { + "tokenizer": "standard", + "filter": [ + "catalan_elision", + "lowercase", + "catalan_stop", + "catalan_keywords", + "catalan_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `cjk` analyzer [cjk-analyzer] + +::::{note} +You may find that `icu_analyzer` in the ICU analysis plugin works better for CJK text than the `cjk` analyzer. Experiment with your text and queries. +:::: + + +The `cjk` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /cjk_example +{ + "settings": { + "analysis": { + "filter": { + "english_stop": { + "type": "stop", + "stopwords": [ <1> + "a", "and", "are", "as", "at", "be", "but", "by", "for", + "if", "in", "into", "is", "it", "no", "not", "of", "on", + "or", "s", "such", "t", "that", "the", "their", "then", + "there", "these", "they", "this", "to", "was", "will", + "with", "www" + ] + } + }, + "analyzer": { + "rebuilt_cjk": { + "tokenizer": "standard", + "filter": [ + "cjk_width", + "lowercase", + "cjk_bigram", + "english_stop" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. The default stop words are **almost** the same as the `_english_` set, but not exactly the same. + + + +### `czech` analyzer [czech-analyzer] + +The `czech` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /czech_example +{ + "settings": { + "analysis": { + "filter": { + "czech_stop": { + "type": "stop", + "stopwords": "_czech_" <1> + }, + "czech_keywords": { + "type": "keyword_marker", + "keywords": ["příklad"] <2> + }, + "czech_stemmer": { + "type": "stemmer", + "language": "czech" + } + }, + "analyzer": { + "rebuilt_czech": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "czech_stop", + "czech_keywords", + "czech_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `danish` analyzer [danish-analyzer] + +The `danish` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /danish_example +{ + "settings": { + "analysis": { + "filter": { + "danish_stop": { + "type": "stop", + "stopwords": "_danish_" <1> + }, + "danish_keywords": { + "type": "keyword_marker", + "keywords": ["eksempel"] <2> + }, + "danish_stemmer": { + "type": "stemmer", + "language": "danish" + } + }, + "analyzer": { + "rebuilt_danish": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "danish_stop", + "danish_keywords", + "danish_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `dutch` analyzer [dutch-analyzer] + +The `dutch` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /dutch_example +{ + "settings": { + "analysis": { + "filter": { + "dutch_stop": { + "type": "stop", + "stopwords": "_dutch_" <1> + }, + "dutch_keywords": { + "type": "keyword_marker", + "keywords": ["voorbeeld"] <2> + }, + "dutch_stemmer": { + "type": "stemmer", + "language": "dutch" + }, + "dutch_override": { + "type": "stemmer_override", + "rules": [ + "fiets=>fiets", + "bromfiets=>bromfiets", + "ei=>eier", + "kind=>kinder" + ] + } + }, + "analyzer": { + "rebuilt_dutch": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "dutch_stop", + "dutch_keywords", + "dutch_override", + "dutch_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `english` analyzer [english-analyzer] + +The `english` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /english_example +{ + "settings": { + "analysis": { + "filter": { + "english_stop": { + "type": "stop", + "stopwords": "_english_" <1> + }, + "english_keywords": { + "type": "keyword_marker", + "keywords": ["example"] <2> + }, + "english_stemmer": { + "type": "stemmer", + "language": "english" + }, + "english_possessive_stemmer": { + "type": "stemmer", + "language": "possessive_english" + } + }, + "analyzer": { + "rebuilt_english": { + "tokenizer": "standard", + "filter": [ + "english_possessive_stemmer", + "lowercase", + "english_stop", + "english_keywords", + "english_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `estonian` analyzer [estonian-analyzer] + +The `estonian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /estonian_example +{ + "settings": { + "analysis": { + "filter": { + "estonian_stop": { + "type": "stop", + "stopwords": "_estonian_" <1> + }, + "estonian_keywords": { + "type": "keyword_marker", + "keywords": ["näide"] <2> + }, + "estonian_stemmer": { + "type": "stemmer", + "language": "estonian" + } + }, + "analyzer": { + "rebuilt_estonian": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "estonian_stop", + "estonian_keywords", + "estonian_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `finnish` analyzer [finnish-analyzer] + +The `finnish` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /finnish_example +{ + "settings": { + "analysis": { + "filter": { + "finnish_stop": { + "type": "stop", + "stopwords": "_finnish_" <1> + }, + "finnish_keywords": { + "type": "keyword_marker", + "keywords": ["esimerkki"] <2> + }, + "finnish_stemmer": { + "type": "stemmer", + "language": "finnish" + } + }, + "analyzer": { + "rebuilt_finnish": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "finnish_stop", + "finnish_keywords", + "finnish_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `french` analyzer [french-analyzer] + +The `french` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /french_example +{ + "settings": { + "analysis": { + "filter": { + "french_elision": { + "type": "elision", + "articles_case": true, + "articles": [ + "l", "m", "t", "qu", "n", "s", + "j", "d", "c", "jusqu", "quoiqu", + "lorsqu", "puisqu" + ] + }, + "french_stop": { + "type": "stop", + "stopwords": "_french_" <1> + }, + "french_keywords": { + "type": "keyword_marker", + "keywords": ["Example"] <2> + }, + "french_stemmer": { + "type": "stemmer", + "language": "light_french" + } + }, + "analyzer": { + "rebuilt_french": { + "tokenizer": "standard", + "filter": [ + "french_elision", + "lowercase", + "french_stop", + "french_keywords", + "french_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `galician` analyzer [galician-analyzer] + +The `galician` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /galician_example +{ + "settings": { + "analysis": { + "filter": { + "galician_stop": { + "type": "stop", + "stopwords": "_galician_" <1> + }, + "galician_keywords": { + "type": "keyword_marker", + "keywords": ["exemplo"] <2> + }, + "galician_stemmer": { + "type": "stemmer", + "language": "galician" + } + }, + "analyzer": { + "rebuilt_galician": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "galician_stop", + "galician_keywords", + "galician_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `german` analyzer [german-analyzer] + +The `german` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /german_example +{ + "settings": { + "analysis": { + "filter": { + "german_stop": { + "type": "stop", + "stopwords": "_german_" <1> + }, + "german_keywords": { + "type": "keyword_marker", + "keywords": ["Beispiel"] <2> + }, + "german_stemmer": { + "type": "stemmer", + "language": "light_german" + } + }, + "analyzer": { + "rebuilt_german": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "german_stop", + "german_keywords", + "german_normalization", + "german_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `greek` analyzer [greek-analyzer] + +The `greek` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /greek_example +{ + "settings": { + "analysis": { + "filter": { + "greek_stop": { + "type": "stop", + "stopwords": "_greek_" <1> + }, + "greek_lowercase": { + "type": "lowercase", + "language": "greek" + }, + "greek_keywords": { + "type": "keyword_marker", + "keywords": ["παράδειγμα"] <2> + }, + "greek_stemmer": { + "type": "stemmer", + "language": "greek" + } + }, + "analyzer": { + "rebuilt_greek": { + "tokenizer": "standard", + "filter": [ + "greek_lowercase", + "greek_stop", + "greek_keywords", + "greek_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `hindi` analyzer [hindi-analyzer] + +The `hindi` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /hindi_example +{ + "settings": { + "analysis": { + "filter": { + "hindi_stop": { + "type": "stop", + "stopwords": "_hindi_" <1> + }, + "hindi_keywords": { + "type": "keyword_marker", + "keywords": ["उदाहरण"] <2> + }, + "hindi_stemmer": { + "type": "stemmer", + "language": "hindi" + } + }, + "analyzer": { + "rebuilt_hindi": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "decimal_digit", + "hindi_keywords", + "indic_normalization", + "hindi_normalization", + "hindi_stop", + "hindi_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `hungarian` analyzer [hungarian-analyzer] + +The `hungarian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /hungarian_example +{ + "settings": { + "analysis": { + "filter": { + "hungarian_stop": { + "type": "stop", + "stopwords": "_hungarian_" <1> + }, + "hungarian_keywords": { + "type": "keyword_marker", + "keywords": ["példa"] <2> + }, + "hungarian_stemmer": { + "type": "stemmer", + "language": "hungarian" + } + }, + "analyzer": { + "rebuilt_hungarian": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "hungarian_stop", + "hungarian_keywords", + "hungarian_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `indonesian` analyzer [indonesian-analyzer] + +The `indonesian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /indonesian_example +{ + "settings": { + "analysis": { + "filter": { + "indonesian_stop": { + "type": "stop", + "stopwords": "_indonesian_" <1> + }, + "indonesian_keywords": { + "type": "keyword_marker", + "keywords": ["contoh"] <2> + }, + "indonesian_stemmer": { + "type": "stemmer", + "language": "indonesian" + } + }, + "analyzer": { + "rebuilt_indonesian": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "indonesian_stop", + "indonesian_keywords", + "indonesian_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `irish` analyzer [irish-analyzer] + +The `irish` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /irish_example +{ + "settings": { + "analysis": { + "filter": { + "irish_hyphenation": { + "type": "stop", + "stopwords": [ "h", "n", "t" ], + "ignore_case": true + }, + "irish_elision": { + "type": "elision", + "articles": [ "d", "m", "b" ], + "articles_case": true + }, + "irish_stop": { + "type": "stop", + "stopwords": "_irish_" <1> + }, + "irish_lowercase": { + "type": "lowercase", + "language": "irish" + }, + "irish_keywords": { + "type": "keyword_marker", + "keywords": ["sampla"] <2> + }, + "irish_stemmer": { + "type": "stemmer", + "language": "irish" + } + }, + "analyzer": { + "rebuilt_irish": { + "tokenizer": "standard", + "filter": [ + "irish_hyphenation", + "irish_elision", + "irish_lowercase", + "irish_stop", + "irish_keywords", + "irish_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `italian` analyzer [italian-analyzer] + +The `italian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /italian_example +{ + "settings": { + "analysis": { + "filter": { + "italian_elision": { + "type": "elision", + "articles": [ + "c", "l", "all", "dall", "dell", + "nell", "sull", "coll", "pell", + "gl", "agl", "dagl", "degl", "negl", + "sugl", "un", "m", "t", "s", "v", "d" + ], + "articles_case": true + }, + "italian_stop": { + "type": "stop", + "stopwords": "_italian_" <1> + }, + "italian_keywords": { + "type": "keyword_marker", + "keywords": ["esempio"] <2> + }, + "italian_stemmer": { + "type": "stemmer", + "language": "light_italian" + } + }, + "analyzer": { + "rebuilt_italian": { + "tokenizer": "standard", + "filter": [ + "italian_elision", + "lowercase", + "italian_stop", + "italian_keywords", + "italian_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `latvian` analyzer [latvian-analyzer] + +The `latvian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /latvian_example +{ + "settings": { + "analysis": { + "filter": { + "latvian_stop": { + "type": "stop", + "stopwords": "_latvian_" <1> + }, + "latvian_keywords": { + "type": "keyword_marker", + "keywords": ["piemērs"] <2> + }, + "latvian_stemmer": { + "type": "stemmer", + "language": "latvian" + } + }, + "analyzer": { + "rebuilt_latvian": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "latvian_stop", + "latvian_keywords", + "latvian_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `lithuanian` analyzer [lithuanian-analyzer] + +The `lithuanian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /lithuanian_example +{ + "settings": { + "analysis": { + "filter": { + "lithuanian_stop": { + "type": "stop", + "stopwords": "_lithuanian_" <1> + }, + "lithuanian_keywords": { + "type": "keyword_marker", + "keywords": ["pavyzdys"] <2> + }, + "lithuanian_stemmer": { + "type": "stemmer", + "language": "lithuanian" + } + }, + "analyzer": { + "rebuilt_lithuanian": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "lithuanian_stop", + "lithuanian_keywords", + "lithuanian_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `norwegian` analyzer [norwegian-analyzer] + +The `norwegian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /norwegian_example +{ + "settings": { + "analysis": { + "filter": { + "norwegian_stop": { + "type": "stop", + "stopwords": "_norwegian_" <1> + }, + "norwegian_keywords": { + "type": "keyword_marker", + "keywords": ["eksempel"] <2> + }, + "norwegian_stemmer": { + "type": "stemmer", + "language": "norwegian" + } + }, + "analyzer": { + "rebuilt_norwegian": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "norwegian_stop", + "norwegian_keywords", + "norwegian_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `persian` analyzer [persian-analyzer] + +The `persian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /persian_example +{ + "settings": { + "analysis": { + "char_filter": { + "zero_width_spaces": { + "type": "mapping", + "mappings": [ "\\u200C=>\\u0020"] <1> + } + }, + "filter": { + "persian_stop": { + "type": "stop", + "stopwords": "_persian_" <2> + } + }, + "analyzer": { + "rebuilt_persian": { + "tokenizer": "standard", + "char_filter": [ "zero_width_spaces" ], + "filter": [ + "lowercase", + "decimal_digit", + "arabic_normalization", + "persian_normalization", + "persian_stop", + "persian_stem" + ] + } + } + } + } +} +``` + +1. Replaces zero-width non-joiners with an ASCII space. +2. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. + + + +### `portuguese` analyzer [portuguese-analyzer] + +The `portuguese` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /portuguese_example +{ + "settings": { + "analysis": { + "filter": { + "portuguese_stop": { + "type": "stop", + "stopwords": "_portuguese_" <1> + }, + "portuguese_keywords": { + "type": "keyword_marker", + "keywords": ["exemplo"] <2> + }, + "portuguese_stemmer": { + "type": "stemmer", + "language": "light_portuguese" + } + }, + "analyzer": { + "rebuilt_portuguese": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "portuguese_stop", + "portuguese_keywords", + "portuguese_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `romanian` analyzer [romanian-analyzer] + +The `romanian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /romanian_example +{ + "settings": { + "analysis": { + "filter": { + "romanian_stop": { + "type": "stop", + "stopwords": "_romanian_" <1> + }, + "romanian_keywords": { + "type": "keyword_marker", + "keywords": ["exemplu"] <2> + }, + "romanian_stemmer": { + "type": "stemmer", + "language": "romanian" + } + }, + "analyzer": { + "rebuilt_romanian": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "romanian_stop", + "romanian_keywords", + "romanian_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `russian` analyzer [russian-analyzer] + +The `russian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /russian_example +{ + "settings": { + "analysis": { + "filter": { + "russian_stop": { + "type": "stop", + "stopwords": "_russian_" <1> + }, + "russian_keywords": { + "type": "keyword_marker", + "keywords": ["пример"] <2> + }, + "russian_stemmer": { + "type": "stemmer", + "language": "russian" + } + }, + "analyzer": { + "rebuilt_russian": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "russian_stop", + "russian_keywords", + "russian_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `serbian` analyzer [serbian-analyzer] + +The `serbian` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /serbian_example +{ + "settings": { + "analysis": { + "filter": { + "serbian_stop": { + "type": "stop", + "stopwords": "_serbian_" <1> + }, + "serbian_keywords": { + "type": "keyword_marker", + "keywords": ["пример"] <2> + }, + "serbian_stemmer": { + "type": "stemmer", + "language": "serbian" + } + }, + "analyzer": { + "rebuilt_serbian": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "serbian_stop", + "serbian_keywords", + "serbian_stemmer", + "serbian_normalization" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `sorani` analyzer [sorani-analyzer] + +The `sorani` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /sorani_example +{ + "settings": { + "analysis": { + "filter": { + "sorani_stop": { + "type": "stop", + "stopwords": "_sorani_" <1> + }, + "sorani_keywords": { + "type": "keyword_marker", + "keywords": ["mînak"] <2> + }, + "sorani_stemmer": { + "type": "stemmer", + "language": "sorani" + } + }, + "analyzer": { + "rebuilt_sorani": { + "tokenizer": "standard", + "filter": [ + "sorani_normalization", + "lowercase", + "decimal_digit", + "sorani_stop", + "sorani_keywords", + "sorani_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `spanish` analyzer [spanish-analyzer] + +The `spanish` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /spanish_example +{ + "settings": { + "analysis": { + "filter": { + "spanish_stop": { + "type": "stop", + "stopwords": "_spanish_" <1> + }, + "spanish_keywords": { + "type": "keyword_marker", + "keywords": ["ejemplo"] <2> + }, + "spanish_stemmer": { + "type": "stemmer", + "language": "light_spanish" + } + }, + "analyzer": { + "rebuilt_spanish": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "spanish_stop", + "spanish_keywords", + "spanish_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `swedish` analyzer [swedish-analyzer] + +The `swedish` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /swedish_example +{ + "settings": { + "analysis": { + "filter": { + "swedish_stop": { + "type": "stop", + "stopwords": "_swedish_" <1> + }, + "swedish_keywords": { + "type": "keyword_marker", + "keywords": ["exempel"] <2> + }, + "swedish_stemmer": { + "type": "stemmer", + "language": "swedish" + } + }, + "analyzer": { + "rebuilt_swedish": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "swedish_stop", + "swedish_keywords", + "swedish_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `turkish` analyzer [turkish-analyzer] + +The `turkish` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /turkish_example +{ + "settings": { + "analysis": { + "filter": { + "turkish_stop": { + "type": "stop", + "stopwords": "_turkish_" <1> + }, + "turkish_lowercase": { + "type": "lowercase", + "language": "turkish" + }, + "turkish_keywords": { + "type": "keyword_marker", + "keywords": ["örnek"] <2> + }, + "turkish_stemmer": { + "type": "stemmer", + "language": "turkish" + } + }, + "analyzer": { + "rebuilt_turkish": { + "tokenizer": "standard", + "filter": [ + "apostrophe", + "turkish_lowercase", + "turkish_stop", + "turkish_keywords", + "turkish_stemmer" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. This filter should be removed unless there are words which should be excluded from stemming. + + + +### `thai` analyzer [thai-analyzer] + +The `thai` analyzer could be reimplemented as a `custom` analyzer as follows: + +```console +PUT /thai_example +{ + "settings": { + "analysis": { + "filter": { + "thai_stop": { + "type": "stop", + "stopwords": "_thai_" <1> + } + }, + "analyzer": { + "rebuilt_thai": { + "tokenizer": "thai", + "filter": [ + "lowercase", + "decimal_digit", + "thai_stop" + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. + + + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-length-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-length-tokenfilter.md new file mode 100644 index 0000000000000..40f0171de06d7 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-length-tokenfilter.md @@ -0,0 +1,105 @@ +--- +navigation_title: "Length" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-length-tokenfilter.html +--- + +# Length token filter [analysis-length-tokenfilter] + + +Removes tokens shorter or longer than specified character lengths. For example, you can use the `length` filter to exclude tokens shorter than 2 characters and tokens longer than 5 characters. + +This filter uses Lucene’s [LengthFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/LengthFilter.md). + +::::{tip} +The `length` filter removes entire tokens. If you’d prefer to shorten tokens to a specific length, use the [`truncate`](/reference/data-analysis/text-analysis/analysis-truncate-tokenfilter.md) filter. + +:::: + + +## Example [analysis-length-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `length` filter to remove tokens longer than 4 characters: + +```console +GET _analyze +{ + "tokenizer": "whitespace", + "filter": [ + { + "type": "length", + "min": 0, + "max": 4 + } + ], + "text": "the quick brown fox jumps over the lazy dog" +} +``` + +The filter produces the following tokens: + +```text +[ the, fox, over, the, lazy, dog ] +``` + + +## Add to an analyzer [analysis-length-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `length` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT length_example +{ + "settings": { + "analysis": { + "analyzer": { + "standard_length": { + "tokenizer": "standard", + "filter": [ "length" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-length-tokenfilter-configure-parms] + +`min` +: (Optional, integer) Minimum character length of a token. Shorter tokens are excluded from the output. Defaults to `0`. + +`max` +: (Optional, integer) Maximum character length of a token. Longer tokens are excluded from the output. Defaults to `Integer.MAX_VALUE`, which is `2^31-1` or `2147483647`. + + +## Customize [analysis-length-tokenfilter-customize] + +To customize the `length` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom `length` filter that removes tokens shorter than 2 characters and tokens longer than 10 characters: + +```console +PUT length_custom_example +{ + "settings": { + "analysis": { + "analyzer": { + "whitespace_length_2_to_10_char": { + "tokenizer": "whitespace", + "filter": [ "length_2_to_10_char" ] + } + }, + "filter": { + "length_2_to_10_char": { + "type": "length", + "min": 2, + "max": 10 + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-letter-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-letter-tokenizer.md new file mode 100644 index 0000000000000..ef14b01e9169b --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-letter-tokenizer.md @@ -0,0 +1,33 @@ +--- +navigation_title: "Letter" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-letter-tokenizer.html +--- + +# Letter tokenizer [analysis-letter-tokenizer] + + +The `letter` tokenizer breaks text into terms whenever it encounters a character which is not a letter. It does a reasonable job for most European languages, but does a terrible job for some Asian languages, where words are not separated by spaces. + + +## Example output [_example_output_11] + +```console +POST _analyze +{ + "tokenizer": "letter", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above sentence would produce the following terms: + +```text +[ The, QUICK, Brown, Foxes, jumped, over, the, lazy, dog, s, bone ] +``` + + +## Configuration [_configuration_12] + +The `letter` tokenizer is not configurable. + diff --git a/docs/reference/data-analysis/text-analysis/analysis-limit-token-count-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-limit-token-count-tokenfilter.md new file mode 100644 index 0000000000000..35a5ed60cce6a --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-limit-token-count-tokenfilter.md @@ -0,0 +1,107 @@ +--- +navigation_title: "Limit token count" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-limit-token-count-tokenfilter.html +--- + +# Limit token count token filter [analysis-limit-token-count-tokenfilter] + + +Limits the number of output tokens. The `limit` filter is commonly used to limit the size of document field values based on token count. + +By default, the `limit` filter keeps only the first token in a stream. For example, the filter can change the token stream `[ one, two, three ]` to `[ one ]`. + +This filter uses Lucene’s [LimitTokenCountFilter](https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.html). + +::::{tip} +``` + If you want to limit the size of field values based on +_character length_, use the <> mapping parameter. +``` +:::: + + +## Configurable parameters [analysis-limit-token-count-tokenfilter-configure-parms] + +`max_token_count` +: (Optional, integer) Maximum number of tokens to keep. Once this limit is reached, any remaining tokens are excluded from the output. Defaults to `1`. + +`consume_all_tokens` +: (Optional, Boolean) If `true`, the `limit` filter exhausts the token stream, even if the `max_token_count` has already been reached. Defaults to `false`. + + +## Example [analysis-limit-token-count-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `limit` filter to keep only the first two tokens in `quick fox jumps over lazy dog`: + +```console +GET _analyze +{ + "tokenizer": "standard", + "filter": [ + { + "type": "limit", + "max_token_count": 2 + } + ], + "text": "quick fox jumps over lazy dog" +} +``` + +The filter produces the following tokens: + +```text +[ quick, fox ] +``` + + +## Add to an analyzer [analysis-limit-token-count-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `limit` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT limit_example +{ + "settings": { + "analysis": { + "analyzer": { + "standard_one_token_limit": { + "tokenizer": "standard", + "filter": [ "limit" ] + } + } + } + } +} +``` + + +## Customize [analysis-limit-token-count-tokenfilter-customize] + +To customize the `limit` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom `limit` filter that keeps only the first five tokens of a stream: + +```console +PUT custom_limit_example +{ + "settings": { + "analysis": { + "analyzer": { + "whitespace_five_token_limit": { + "tokenizer": "whitespace", + "filter": [ "five_token_limit" ] + } + }, + "filter": { + "five_token_limit": { + "type": "limit", + "max_token_count": 5 + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md new file mode 100644 index 0000000000000..8e41ebf56e73d --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md @@ -0,0 +1,102 @@ +--- +navigation_title: "Lowercase" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-lowercase-tokenfilter.html +--- + +# Lowercase token filter [analysis-lowercase-tokenfilter] + + +Changes token text to lowercase. For example, you can use the `lowercase` filter to change `THE Lazy DoG` to `the lazy dog`. + +In addition to a default filter, the `lowercase` token filter provides access to Lucene’s language-specific lowercase filters for Greek, Irish, and Turkish. + +## Example [analysis-lowercase-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the default `lowercase` filter to change the `THE Quick FoX JUMPs` to lowercase: + +```console +GET _analyze +{ + "tokenizer" : "standard", + "filter" : ["lowercase"], + "text" : "THE Quick FoX JUMPs" +} +``` + +The filter produces the following tokens: + +```text +[ the, quick, fox, jumps ] +``` + + +## Add to an analyzer [analysis-lowercase-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `lowercase` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT lowercase_example +{ + "settings": { + "analysis": { + "analyzer": { + "whitespace_lowercase": { + "tokenizer": "whitespace", + "filter": [ "lowercase" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-lowercase-tokenfilter-configure-parms] + +`language` +: (Optional, string) Language-specific lowercase token filter to use. Valid values include: + +`greek` +: Uses Lucene’s [GreekLowerCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/el/GreekLowerCaseFilter.md) + +`irish` +: Uses Lucene’s [IrishLowerCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.md) + +`turkish` +: Uses Lucene’s [TurkishLowerCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.md) + +If not specified, defaults to Lucene’s [LowerCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/core/LowerCaseFilter.md). + + + +## Customize [analysis-lowercase-tokenfilter-customize] + +To customize the `lowercase` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom `lowercase` filter for the Greek language: + +```console +PUT custom_lowercase_example +{ + "settings": { + "analysis": { + "analyzer": { + "greek_lowercase_example": { + "type": "custom", + "tokenizer": "standard", + "filter": ["greek_lowercase"] + } + }, + "filter": { + "greek_lowercase": { + "type": "lowercase", + "language": "greek" + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-lowercase-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-lowercase-tokenizer.md new file mode 100644 index 0000000000000..6d3bb6a082c31 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-lowercase-tokenizer.md @@ -0,0 +1,33 @@ +--- +navigation_title: "Lowercase" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-lowercase-tokenizer.html +--- + +# Lowercase tokenizer [analysis-lowercase-tokenizer] + + +The `lowercase` tokenizer, like the [`letter` tokenizer](/reference/data-analysis/text-analysis/analysis-letter-tokenizer.md) breaks text into terms whenever it encounters a character which is not a letter, but it also lowercases all terms. It is functionally equivalent to the [`letter` tokenizer](/reference/data-analysis/text-analysis/analysis-letter-tokenizer.md) combined with the [`lowercase` token filter](/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md), but is more efficient as it performs both steps in a single pass. + + +## Example output [_example_output_12] + +```console +POST _analyze +{ + "tokenizer": "lowercase", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above sentence would produce the following terms: + +```text +[ the, quick, brown, foxes, jumped, over, the, lazy, dog, s, bone ] +``` + + +## Configuration [_configuration_13] + +The `lowercase` tokenizer is not configurable. + diff --git a/docs/reference/data-analysis/text-analysis/analysis-mapping-charfilter.md b/docs/reference/data-analysis/text-analysis/analysis-mapping-charfilter.md new file mode 100644 index 0000000000000..d418ff09ae649 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-mapping-charfilter.md @@ -0,0 +1,121 @@ +--- +navigation_title: "Mapping" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-mapping-charfilter.html +--- + +# Mapping character filter [analysis-mapping-charfilter] + + +The `mapping` character filter accepts a map of keys and values. Whenever it encounters a string of characters that is the same as a key, it replaces them with the value associated with that key. + +Matching is greedy; the longest pattern matching at a given point wins. Replacements are allowed to be the empty string. + +The `mapping` filter uses Lucene’s [MappingCharFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/charfilter/MappingCharFilter.md). + +## Example [analysis-mapping-charfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `mapping` filter to convert Hindu-Arabic numerals (٠‎١٢٣٤٥٦٧٨‎٩‎) into their Arabic-Latin equivalents (0123456789), changing the text `My license plate is ٢٥٠١٥` to `My license plate is 25015`. + +```console +GET /_analyze +{ + "tokenizer": "keyword", + "char_filter": [ + { + "type": "mapping", + "mappings": [ + "٠ => 0", + "١ => 1", + "٢ => 2", + "٣ => 3", + "٤ => 4", + "٥ => 5", + "٦ => 6", + "٧ => 7", + "٨ => 8", + "٩ => 9" + ] + } + ], + "text": "My license plate is ٢٥٠١٥" +} +``` + +The filter produces the following text: + +```text +[ My license plate is 25015 ] +``` + + +## Configurable parameters [analysis-mapping-charfilter-configure-parms] + +`mappings` +: (Required*, array of strings) Array of mappings, with each element having the form `key => value`. + + Either this or the `mappings_path` parameter must be specified. + + +`mappings_path` +: (Required*, string) Path to a file containing `key => value` mappings. + + This path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each mapping in the file must be separated by a line break. + + Either this or the `mappings` parameter must be specified. + + + +## Customize and add to an analyzer [analysis-mapping-charfilter-customize] + +To customize the `mappings` filter, duplicate it to create the basis for a new custom character filter. You can modify the filter using its configurable parameters. + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request configures a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md) using a custom `mappings` filter, `my_mappings_char_filter`. + +The `my_mappings_char_filter` filter replaces the `:)` and `:(` emoticons with a text equivalent. + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "standard", + "char_filter": [ + "my_mappings_char_filter" + ] + } + }, + "char_filter": { + "my_mappings_char_filter": { + "type": "mapping", + "mappings": [ + ":) => _happy_", + ":( => _sad_" + ] + } + } + } + } +} +``` + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the custom `my_mappings_char_filter` to replace `:(` with `_sad_` in the text `I'm delighted about it :(`. + +```console +GET /my-index-000001/_analyze +{ + "tokenizer": "keyword", + "char_filter": [ "my_mappings_char_filter" ], + "text": "I'm delighted about it :(" +} +``` + +The filter produces the following text: + +```text +[ I'm delighted about it _sad_ ] +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-minhash-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-minhash-tokenfilter.md new file mode 100644 index 0000000000000..91bd40382a091 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-minhash-tokenfilter.md @@ -0,0 +1,121 @@ +--- +navigation_title: "MinHash" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-minhash-tokenfilter.html +--- + +# MinHash token filter [analysis-minhash-tokenfilter] + + +Uses the [MinHash](https://en.wikipedia.org/wiki/MinHash) technique to produce a signature for a token stream. You can use MinHash signatures to estimate the similarity of documents. See [Using the `min_hash` token filter for similarity search](#analysis-minhash-tokenfilter-similarity-search). + +The `min_hash` filter performs the following operations on a token stream in order: + +1. Hashes each token in the stream. +2. Assigns the hashes to buckets, keeping only the smallest hashes of each bucket. +3. Outputs the smallest hash from each bucket as a token stream. + +This filter uses Lucene’s [MinHashFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/minhash/MinHashFilter.md). + +## Configurable parameters [analysis-minhash-tokenfilter-configure-parms] + +`bucket_count` +: (Optional, integer) Number of buckets to which hashes are assigned. Defaults to `512`. + +`hash_count` +: (Optional, integer) Number of ways to hash each token in the stream. Defaults to `1`. + +`hash_set_size` +: (Optional, integer) Number of hashes to keep from each bucket. Defaults to `1`. + + Hashes are retained by ascending size, starting with the bucket’s smallest hash first. + + +`with_rotation` +: (Optional, Boolean) If `true`, the filter fills empty buckets with the value of the first non-empty bucket to its circular right if the `hash_set_size` is `1`. If the `bucket_count` argument is greater than `1`, this parameter defaults to `true`. Otherwise, this parameter defaults to `false`. + + +## Tips for configuring the `min_hash` filter [analysis-minhash-tokenfilter-configuration-tips] + +* `min_hash` filter input tokens should typically be k-words shingles produced from [shingle token filter](/reference/data-analysis/text-analysis/analysis-shingle-tokenfilter.md). You should choose `k` large enough so that the probability of any given shingle occurring in a document is low. At the same time, as internally each shingle is hashed into to 128-bit hash, you should choose `k` small enough so that all possible different k-words shingles can be hashed to 128-bit hash with minimal collision. +* We recommend you test different arguments for the `hash_count`, `bucket_count` and `hash_set_size` parameters: + + * To improve precision, increase the `bucket_count` or `hash_set_size` arguments. Higher `bucket_count` and `hash_set_size` values increase the likelihood that different tokens are indexed to different buckets. + * To improve the recall, increase the value of the `hash_count` argument. For example, setting `hash_count` to `2` hashes each token in two different ways, increasing the number of potential candidates for search. + +* By default, the `min_hash` filter produces 512 tokens for each document. Each token is 16 bytes in size. This means each document’s size will be increased by around 8Kb. +* The `min_hash` filter is used for Jaccard similarity. This means that it doesn’t matter how many times a document contains a certain token, only that if it contains it or not. + + +## Using the `min_hash` token filter for similarity search [analysis-minhash-tokenfilter-similarity-search] + +The `min_hash` token filter allows you to hash documents for similarity search. Similarity search, or nearest neighbor search is a complex problem. A naive solution requires an exhaustive pairwise comparison between a query document and every document in an index. This is a prohibitive operation if the index is large. A number of approximate nearest neighbor search solutions have been developed to make similarity search more practical and computationally feasible. One of these solutions involves hashing of documents. + +Documents are hashed in a way that similar documents are more likely to produce the same hash code and are put into the same hash bucket, while dissimilar documents are more likely to be hashed into different hash buckets. This type of hashing is known as locality sensitive hashing (LSH). + +Depending on what constitutes the similarity between documents, various LSH functions [have been proposed](https://arxiv.org/abs/1408.2927). For [Jaccard similarity](https://en.wikipedia.org/wiki/Jaccard_index), a popular LSH function is [MinHash](https://en.wikipedia.org/wiki/MinHash). A general idea of the way MinHash produces a signature for a document is by applying a random permutation over the whole index vocabulary (random numbering for the vocabulary), and recording the minimum value for this permutation for the document (the minimum number for a vocabulary word that is present in the document). The permutations are run several times; combining the minimum values for all of them will constitute a signature for the document. + +In practice, instead of random permutations, a number of hash functions are chosen. A hash function calculates a hash code for each of a document’s tokens and chooses the minimum hash code among them. The minimum hash codes from all hash functions are combined to form a signature for the document. + + +## Customize and add to an analyzer [analysis-minhash-tokenfilter-customize] + +To customize the `min_hash` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the following custom token filters to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md): + +* `my_shingle_filter`, a custom [`shingle` filter](/reference/data-analysis/text-analysis/analysis-shingle-tokenfilter.md). `my_shingle_filter` only outputs five-word shingles. +* `my_minhash_filter`, a custom `min_hash` filter. `my_minhash_filter` hashes each five-word shingle once. It then assigns the hashes into 512 buckets, keeping only the smallest hash from each bucket. + +The request also assigns the custom analyzer to the `fingerprint` field mapping. + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "filter": { + "my_shingle_filter": { <1> + "type": "shingle", + "min_shingle_size": 5, + "max_shingle_size": 5, + "output_unigrams": false + }, + "my_minhash_filter": { + "type": "min_hash", + "hash_count": 1, <2> + "bucket_count": 512, <3> + "hash_set_size": 1, <4> + "with_rotation": true <5> + } + }, + "analyzer": { + "my_analyzer": { + "tokenizer": "standard", + "filter": [ + "my_shingle_filter", + "my_minhash_filter" + ] + } + } + } + }, + "mappings": { + "properties": { + "fingerprint": { + "type": "text", + "analyzer": "my_analyzer" + } + } + } +} +``` + +1. Configures a custom shingle filter to output only five-word shingles. +2. Each five-word shingle in the stream is hashed once. +3. The hashes are assigned to 512 buckets. +4. Only the smallest hash in each bucket is retained. +5. The filter fills empty buckets with the values of neighboring buckets. + + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-multiplexer-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-multiplexer-tokenfilter.md new file mode 100644 index 0000000000000..2a4f6c7d39608 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-multiplexer-tokenfilter.md @@ -0,0 +1,119 @@ +--- +navigation_title: "Multiplexer" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-multiplexer-tokenfilter.html +--- + +# Multiplexer token filter [analysis-multiplexer-tokenfilter] + + +A token filter of type `multiplexer` will emit multiple tokens at the same position, each version of the token having been run through a different filter. Identical output tokens at the same position will be removed. + +::::{warning} +If the incoming token stream has duplicate tokens, then these will also be removed by the multiplexer +:::: + + + +## Options [_options] + +filters +: a list of token filters to apply to incoming tokens. These can be any token filters defined elsewhere in the index mappings. Filters can be chained using a comma-delimited string, so for example `"lowercase, porter_stem"` would apply the `lowercase` filter and then the `porter_stem` filter to a single token. + +::::{warning} +[Shingle](/reference/data-analysis/text-analysis/analysis-shingle-tokenfilter.md) or multi-word synonym token filters will not function normally when they are declared in the filters array because they read ahead internally which is unsupported by the multiplexer +:::: + + +preserve_original +: if `true` (the default) then emit the original token in addition to the filtered tokens + + +## Settings example [_settings_example] + +You can set it up like: + +```console +PUT /multiplexer_example +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "standard", + "filter": [ "my_multiplexer" ] + } + }, + "filter": { + "my_multiplexer": { + "type": "multiplexer", + "filters": [ "lowercase", "lowercase, porter_stem" ] + } + } + } + } +} +``` + +And test it like: + +```console +POST /multiplexer_example/_analyze +{ + "analyzer" : "my_analyzer", + "text" : "Going HOME" +} +``` + +And it’d respond: + +```console-result +{ + "tokens": [ + { + "token": "Going", + "start_offset": 0, + "end_offset": 5, + "type": "", + "position": 0 + }, + { + "token": "going", + "start_offset": 0, + "end_offset": 5, + "type": "", + "position": 0 + }, + { + "token": "go", + "start_offset": 0, + "end_offset": 5, + "type": "", + "position": 0 + }, + { + "token": "HOME", + "start_offset": 6, + "end_offset": 10, + "type": "", + "position": 1 + }, + { + "token": "home", <1> + "start_offset": 6, + "end_offset": 10, + "type": "", + "position": 1 + } + ] +} +``` + +1. The stemmer has also emitted a token `home` at position 1, but because it is a duplicate of this token it has been removed from the token stream + + +::::{note} +The synonym and synonym_graph filters use their preceding analysis chain to parse and analyse their synonym lists, and will throw an exception if that chain contains token filters that produce multiple tokens at the same position. If you want to apply synonyms to a token stream containing a multiplexer, then you should append the synonym filter to each relevant multiplexer filter list, rather than placing it after the multiplexer in the main token chain definition. +:::: + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-ngram-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-ngram-tokenfilter.md new file mode 100644 index 0000000000000..c2dff50cf4482 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-ngram-tokenfilter.md @@ -0,0 +1,109 @@ +--- +navigation_title: "N-gram" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-ngram-tokenfilter.html +--- + +# N-gram token filter [analysis-ngram-tokenfilter] + + +Forms [n-grams](https://en.wikipedia.org/wiki/N-gram) of specified lengths from a token. + +For example, you can use the `ngram` token filter to change `fox` to `[ f, fo, o, ox, x ]`. + +This filter uses Lucene’s [NGramTokenFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/ngram/NGramTokenFilter.md). + +::::{note} +The `ngram` filter is similar to the [`edge_ngram` token filter](/reference/data-analysis/text-analysis/analysis-edgengram-tokenfilter.md). However, the `edge_ngram` only outputs n-grams that start at the beginning of a token. + +:::: + + +## Example [analysis-ngram-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `ngram` filter to convert `Quick fox` to 1-character and 2-character n-grams: + +```console +GET _analyze +{ + "tokenizer": "standard", + "filter": [ "ngram" ], + "text": "Quick fox" +} +``` + +The filter produces the following tokens: + +```text +[ Q, Qu, u, ui, i, ic, c, ck, k, f, fo, o, ox, x ] +``` + + +## Add to an analyzer [analysis-ngram-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `ngram` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT ngram_example +{ + "settings": { + "analysis": { + "analyzer": { + "standard_ngram": { + "tokenizer": "standard", + "filter": [ "ngram" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-ngram-tokenfilter-configure-parms] + +`max_gram` +: (Optional, integer) Maximum length of characters in a gram. Defaults to `2`. + +`min_gram` +: (Optional, integer) Minimum length of characters in a gram. Defaults to `1`. + +`preserve_original` +: (Optional, Boolean) Emits original token when set to `true`. Defaults to `false`. + +You can use the [`index.max_ngram_diff`](/reference/elasticsearch/index-settings/index-modules.md#index-max-ngram-diff) index-level setting to control the maximum allowed difference between the `max_gram` and `min_gram` values. + + +## Customize [analysis-ngram-tokenfilter-customize] + +To customize the `ngram` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom `ngram` filter that forms n-grams between 3-5 characters. The request also increases the `index.max_ngram_diff` setting to `2`. + +```console +PUT ngram_custom_example +{ + "settings": { + "index": { + "max_ngram_diff": 2 + }, + "analysis": { + "analyzer": { + "default": { + "tokenizer": "whitespace", + "filter": [ "3_5_grams" ] + } + }, + "filter": { + "3_5_grams": { + "type": "ngram", + "min_gram": 3, + "max_gram": 5 + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-ngram-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-ngram-tokenizer.md new file mode 100644 index 0000000000000..cf0e64d8f9142 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-ngram-tokenizer.md @@ -0,0 +1,109 @@ +--- +navigation_title: "N-gram" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-ngram-tokenizer.html +--- + +# N-gram tokenizer [analysis-ngram-tokenizer] + + +The `ngram` tokenizer first breaks text down into words whenever it encounters one of a list of specified characters, then it emits [N-grams](https://en.wikipedia.org/wiki/N-gram) of each word of the specified length. + +N-grams are like a sliding window that moves across the word - a continuous sequence of characters of the specified length. They are useful for querying languages that don’t use spaces or that have long compound words, like German. + + +## Example output [_example_output_13] + +With the default settings, the `ngram` tokenizer treats the initial text as a single token and produces N-grams with minimum length `1` and maximum length `2`: + +```console +POST _analyze +{ + "tokenizer": "ngram", + "text": "Quick Fox" +} +``` + +The above sentence would produce the following terms: + +```text +[ Q, Qu, u, ui, i, ic, c, ck, k, "k ", " ", " F", F, Fo, o, ox, x ] +``` + + +## Configuration [_configuration_14] + +The `ngram` tokenizer accepts the following parameters: + +`min_gram` +: Minimum length of characters in a gram. Defaults to `1`. + +`max_gram` +: Maximum length of characters in a gram. Defaults to `2`. + +`token_chars` +: Character classes that should be included in a token. Elasticsearch will split on characters that don’t belong to the classes specified. Defaults to `[]` (keep all characters). + + Character classes may be any of the following: + + * `letter` —  for example `a`, `b`, `ï` or `京` + * `digit` —  for example `3` or `7` + * `whitespace` —  for example `" "` or `"\n"` + * `punctuation` — for example `!` or `"` + * `symbol` —  for example `$` or `√` + * `custom` —  custom characters which need to be set using the `custom_token_chars` setting. + + +`custom_token_chars` +: Custom characters that should be treated as part of a token. For example, setting this to `+-_` will make the tokenizer treat the plus, minus and underscore sign as part of a token. + +::::{tip} +It usually makes sense to set `min_gram` and `max_gram` to the same value. The smaller the length, the more documents will match but the lower the quality of the matches. The longer the length, the more specific the matches. A tri-gram (length `3`) is a good place to start. +:::: + + +The index level setting `index.max_ngram_diff` controls the maximum allowed difference between `max_gram` and `min_gram`. + + +## Example configuration [_example_configuration_8] + +In this example, we configure the `ngram` tokenizer to treat letters and digits as tokens, and to produce tri-grams (grams of length `3`): + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "my_tokenizer" + } + }, + "tokenizer": { + "my_tokenizer": { + "type": "ngram", + "min_gram": 3, + "max_gram": 3, + "token_chars": [ + "letter", + "digit" + ] + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_analyzer", + "text": "2 Quick Foxes." +} +``` + +The above example produces the following terms: + +```text +[ Qui, uic, ick, Fox, oxe, xes ] +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-normalization-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-normalization-tokenfilter.md new file mode 100644 index 0000000000000..415a8652bb818 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-normalization-tokenfilter.md @@ -0,0 +1,35 @@ +--- +navigation_title: "Normalization" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-normalization-tokenfilter.html +--- + +# Normalization token filters [analysis-normalization-tokenfilter] + + +There are several token filters available which try to normalize special characters of a certain language. + +Arabic +: [`arabic_normalization`](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.html) + +German +: [`german_normalization`](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/de/GermanNormalizationFilter.html) + +Hindi +: [`hindi_normalization`](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/hi/HindiNormalizationFilter.html) + +Indic +: [`indic_normalization`](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/in/IndicNormalizationFilter.html) + +Kurdish (Sorani) +: [`sorani_normalization`](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/ckb/SoraniNormalizationFilter.html) + +Persian +: [`persian_normalization`](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/fa/PersianNormalizationFilter.html) + +Scandinavian +: [`scandinavian_normalization`](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/ScandinavianNormalizer.html), [`scandinavian_folding`](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/ScandinavianFoldingFilter.html) + +Serbian +: [`serbian_normalization`](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/sr/SerbianNormalizationFilter.html) + diff --git a/docs/reference/data-analysis/text-analysis/analysis-pathhierarchy-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-pathhierarchy-tokenizer.md new file mode 100644 index 0000000000000..139d7ea7a613e --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-pathhierarchy-tokenizer.md @@ -0,0 +1,247 @@ +--- +navigation_title: "Path hierarchy" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html +--- + +# Path hierarchy tokenizer [analysis-pathhierarchy-tokenizer] + + +The `path_hierarchy` tokenizer takes a hierarchical value like a filesystem path, splits on the path separator, and emits a term for each component in the tree. The `path_hierarcy` tokenizer uses Lucene’s [PathHierarchyTokenizer](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/path/PathHierarchyTokenizer.md) underneath. + + +## Example output [_example_output_14] + +```console +POST _analyze +{ + "tokenizer": "path_hierarchy", + "text": "/one/two/three" +} +``` + +The above text would produce the following terms: + +```text +[ /one, /one/two, /one/two/three ] +``` + + +## Configuration [_configuration_15] + +The `path_hierarchy` tokenizer accepts the following parameters: + +`delimiter` +: The character to use as the path separator. Defaults to `/`. + +`replacement` +: An optional replacement character to use for the delimiter. Defaults to the `delimiter`. + +`buffer_size` +: The number of characters read into the term buffer in a single pass. Defaults to `1024`. The term buffer will grow by this size until all the text has been consumed. It is advisable not to change this setting. + +`reverse` +: If `true`, uses Lucene’s [ReversePathHierarchyTokenizer](http://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.md), which is suitable for domain–like hierarchies. Defaults to `false`. + +`skip` +: The number of initial tokens to skip. Defaults to `0`. + + +## Example configuration [_example_configuration_9] + +In this example, we configure the `path_hierarchy` tokenizer to split on `-` characters, and to replace them with `/`. The first two tokens are skipped: + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "my_tokenizer" + } + }, + "tokenizer": { + "my_tokenizer": { + "type": "path_hierarchy", + "delimiter": "-", + "replacement": "/", + "skip": 2 + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_analyzer", + "text": "one-two-three-four-five" +} +``` + +The above example produces the following terms: + +```text +[ /three, /three/four, /three/four/five ] +``` + +If we were to set `reverse` to `true`, it would produce the following: + +```text +[ one/two/three/, two/three/, three/ ] +``` + + +## Detailed examples [analysis-pathhierarchy-tokenizer-detailed-examples] + +A common use-case for the `path_hierarchy` tokenizer is filtering results by file paths. If indexing a file path along with the data, the use of the `path_hierarchy` tokenizer to analyze the path allows filtering the results by different parts of the file path string. + +This example configures an index to have two custom analyzers and applies those analyzers to multifields of the `file_path` text field that will store filenames. One of the two analyzers uses reverse tokenization. Some sample documents are then indexed to represent some file paths for photos inside photo folders of two different users. + +```console +PUT file-path-test +{ + "settings": { + "analysis": { + "analyzer": { + "custom_path_tree": { + "tokenizer": "custom_hierarchy" + }, + "custom_path_tree_reversed": { + "tokenizer": "custom_hierarchy_reversed" + } + }, + "tokenizer": { + "custom_hierarchy": { + "type": "path_hierarchy", + "delimiter": "/" + }, + "custom_hierarchy_reversed": { + "type": "path_hierarchy", + "delimiter": "/", + "reverse": "true" + } + } + } + }, + "mappings": { + "properties": { + "file_path": { + "type": "text", + "fields": { + "tree": { + "type": "text", + "analyzer": "custom_path_tree" + }, + "tree_reversed": { + "type": "text", + "analyzer": "custom_path_tree_reversed" + } + } + } + } + } +} + +POST file-path-test/_doc/1 +{ + "file_path": "/User/alice/photos/2017/05/16/my_photo1.jpg" +} + +POST file-path-test/_doc/2 +{ + "file_path": "/User/alice/photos/2017/05/16/my_photo2.jpg" +} + +POST file-path-test/_doc/3 +{ + "file_path": "/User/alice/photos/2017/05/16/my_photo3.jpg" +} + +POST file-path-test/_doc/4 +{ + "file_path": "/User/alice/photos/2017/05/15/my_photo1.jpg" +} + +POST file-path-test/_doc/5 +{ + "file_path": "/User/bob/photos/2017/05/16/my_photo1.jpg" +} +``` + +A search for a particular file path string against the text field matches all the example documents, with Bob’s documents ranking highest due to `bob` also being one of the terms created by the standard analyzer boosting relevance for Bob’s documents. + +```console +GET file-path-test/_search +{ + "query": { + "match": { + "file_path": "/User/bob/photos/2017/05" + } + } +} +``` + +It’s simple to match or filter documents with file paths that exist within a particular directory using the `file_path.tree` field. + +```console +GET file-path-test/_search +{ + "query": { + "term": { + "file_path.tree": "/User/alice/photos/2017/05/16" + } + } +} +``` + +With the reverse parameter for this tokenizer, it’s also possible to match from the other end of the file path, such as individual file names or a deep level subdirectory. The following example shows a search for all files named `my_photo1.jpg` within any directory via the `file_path.tree_reversed` field configured to use the reverse parameter in the mapping. + +```console +GET file-path-test/_search +{ + "query": { + "term": { + "file_path.tree_reversed": { + "value": "my_photo1.jpg" + } + } + } +} +``` + +Viewing the tokens generated with both forward and reverse is instructive in showing the tokens created for the same file path value. + +```console +POST file-path-test/_analyze +{ + "analyzer": "custom_path_tree", + "text": "/User/alice/photos/2017/05/16/my_photo1.jpg" +} + +POST file-path-test/_analyze +{ + "analyzer": "custom_path_tree_reversed", + "text": "/User/alice/photos/2017/05/16/my_photo1.jpg" +} +``` + +It’s also useful to be able to filter with file paths when combined with other types of searches, such as this example looking for any files paths with `16` that also must be in Alice’s photo directory. + +```console +GET file-path-test/_search +{ + "query": { + "bool" : { + "must" : { + "match" : { "file_path" : "16" } + }, + "filter": { + "term" : { "file_path.tree" : "/User/alice" } + } + } + } +} +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-pattern-analyzer.md b/docs/reference/data-analysis/text-analysis/analysis-pattern-analyzer.md new file mode 100644 index 0000000000000..ff5c7e550b7b6 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-pattern-analyzer.md @@ -0,0 +1,190 @@ +--- +navigation_title: "Pattern" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html +--- + +# Pattern analyzer [analysis-pattern-analyzer] + + +The `pattern` analyzer uses a regular expression to split the text into terms. The regular expression should match the **token separators** not the tokens themselves. The regular expression defaults to `\W+` (or all non-word characters). + +::::{admonition} Beware of Pathological Regular Expressions +:class: warning + +The pattern analyzer uses [Java Regular Expressions](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md). + +A badly written regular expression could run very slowly or even throw a StackOverflowError and cause the node it is running on to exit suddenly. + +Read more about [pathological regular expressions and how to avoid them](https://www.regular-expressions.info/catastrophic.md). + +:::: + + + +## Example output [_example_output_3] + +```console +POST _analyze +{ + "analyzer": "pattern", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above sentence would produce the following terms: + +```text +[ the, 2, quick, brown, foxes, jumped, over, the, lazy, dog, s, bone ] +``` + + +## Configuration [_configuration_4] + +The `pattern` analyzer accepts the following parameters: + +`pattern` +: A [Java regular expression](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md), defaults to `\W+`. + +`flags` +: Java regular expression [flags](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md#field.summary). Flags should be pipe-separated, eg `"CASE_INSENSITIVE|COMMENTS"`. + +`lowercase` +: Should terms be lowercased or not. Defaults to `true`. + +`stopwords` +: A pre-defined stop words list like `_english_` or an array containing a list of stop words. Defaults to `_none_`. + +`stopwords_path` +: The path to a file containing stop words. + +See the [Stop Token Filter](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) for more information about stop word configuration. + + +## Example configuration [_example_configuration_3] + +In this example, we configure the `pattern` analyzer to split email addresses on non-word characters or on underscores (`\W|_`), and to lower-case the result: + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_email_analyzer": { + "type": "pattern", + "pattern": "\\W|_", <1> + "lowercase": true + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_email_analyzer", + "text": "John_Smith@foo-bar.com" +} +``` + +1. The backslashes in the pattern need to be escaped when specifying the pattern as a JSON string. + + +The above example produces the following terms: + +```text +[ john, smith, foo, bar, com ] +``` + + +### CamelCase tokenizer [_camelcase_tokenizer] + +The following more complicated example splits CamelCase text into tokens: + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "camel": { + "type": "pattern", + "pattern": "([^\\p{L}\\d]+)|(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)|(?<=[\\p{L}&&[^\\p{Lu}]])(?=\\p{Lu})|(?<=\\p{Lu})(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])" + } + } + } + } +} + +GET my-index-000001/_analyze +{ + "analyzer": "camel", + "text": "MooseX::FTPClass2_beta" +} +``` + +The above example produces the following terms: + +```text +[ moose, x, ftp, class, 2, beta ] +``` + +The regex above is easier to understand as: + +```text + ([^\p{L}\d]+) # swallow non letters and numbers, +| (?<=\D)(?=\d) # or non-number followed by number, +| (?<=\d)(?=\D) # or number followed by non-number, +| (?<=[ \p{L} && [^\p{Lu}]]) # or lower case + (?=\p{Lu}) # followed by upper case, +| (?<=\p{Lu}) # or upper case + (?=\p{Lu} # followed by upper case + [\p{L}&&[^\p{Lu}]] # then lower case + ) +``` + + +## Definition [_definition_3] + +The `pattern` analyzer consists of: + +Tokenizer +: * [Pattern Tokenizer](/reference/data-analysis/text-analysis/analysis-pattern-tokenizer.md) + + +Token Filters +: * [Lower Case Token Filter](/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md) +* [Stop Token Filter](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) (disabled by default) + + +If you need to customize the `pattern` analyzer beyond the configuration parameters then you need to recreate it as a `custom` analyzer and modify it, usually by adding token filters. This would recreate the built-in `pattern` analyzer and you can use it as a starting point for further customization: + +```console +PUT /pattern_example +{ + "settings": { + "analysis": { + "tokenizer": { + "split_on_non_word": { + "type": "pattern", + "pattern": "\\W+" <1> + } + }, + "analyzer": { + "rebuilt_pattern": { + "tokenizer": "split_on_non_word", + "filter": [ + "lowercase" <2> + ] + } + } + } + } +} +``` + +1. The default pattern is `\W+` which splits on non-word characters and this is where you’d change it. +2. You’d add other token filters after `lowercase`. + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-pattern-capture-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-pattern-capture-tokenfilter.md new file mode 100644 index 0000000000000..087e46f3f8fb0 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-pattern-capture-tokenfilter.md @@ -0,0 +1,131 @@ +--- +navigation_title: "Pattern capture" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-capture-tokenfilter.html +--- + +# Pattern capture token filter [analysis-pattern-capture-tokenfilter] + + +The `pattern_capture` token filter, unlike the `pattern` tokenizer, emits a token for every capture group in the regular expression. Patterns are not anchored to the beginning and end of the string, so each pattern can match multiple times, and matches are allowed to overlap. + +::::{admonition} Beware of Pathological Regular Expressions +:class: warning + +The pattern capture token filter uses [Java Regular Expressions](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md). + +A badly written regular expression could run very slowly or even throw a StackOverflowError and cause the node it is running on to exit suddenly. + +Read more about [pathological regular expressions and how to avoid them](https://www.regular-expressions.info/catastrophic.md). + +:::: + + +For instance a pattern like : + +```text +"(([a-z]+)(\d*))" +``` + +when matched against: + +```text +"abc123def456" +``` + +would produce the tokens: [ `abc123`, `abc`, `123`, `def456`, `def`, `456` ] + +If `preserve_original` is set to `true` (the default) then it would also emit the original token: `abc123def456`. + +This is particularly useful for indexing text like camel-case code, eg `stripHTML` where a user may search for `"strip html"` or `"striphtml"`: + +```console +PUT test +{ + "settings" : { + "analysis" : { + "filter" : { + "code" : { + "type" : "pattern_capture", + "preserve_original" : true, + "patterns" : [ + "(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)", + "(\\d+)" + ] + } + }, + "analyzer" : { + "code" : { + "tokenizer" : "pattern", + "filter" : [ "code", "lowercase" ] + } + } + } + } +} +``` + +When used to analyze the text + +```java +import static org.apache.commons.lang.StringEscapeUtils.escapeHtml +``` + +this emits the tokens: [ `import`, `static`, `org`, `apache`, `commons`, `lang`, `stringescapeutils`, `string`, `escape`, `utils`, `escapehtml`, `escape`, `html` ] + +Another example is analyzing email addresses: + +```console +PUT test +{ + "settings" : { + "analysis" : { + "filter" : { + "email" : { + "type" : "pattern_capture", + "preserve_original" : true, + "patterns" : [ + "([^@]+)", + "(\\p{L}+)", + "(\\d+)", + "@(.+)" + ] + } + }, + "analyzer" : { + "email" : { + "tokenizer" : "uax_url_email", + "filter" : [ "email", "lowercase", "unique" ] + } + } + } + } +} +``` + +When the above analyzer is used on an email address like: + +```text +john-smith_123@foo-bar.com +``` + +it would produce the following tokens: + +``` +john-smith_123@foo-bar.com, john-smith_123, +john, smith, 123, foo-bar.com, foo, bar, com +``` +Multiple patterns are required to allow overlapping captures, but also means that patterns are less dense and easier to understand. + +**Note:** All tokens are emitted in the same position, and with the same character offsets. This means, for example, that a `match` query for `john-smith_123@foo-bar.com` that uses this analyzer will return documents containing any of these tokens, even when using the `and` operator. Also, when combined with highlighting, the whole original token will be highlighted, not just the matching subset. For instance, querying the above email address for `"smith"` would highlight: + +```html + john-smith_123@foo-bar.com +``` + +not: + +```html + john-smith_123@foo-bar.com +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-pattern-replace-charfilter.md b/docs/reference/data-analysis/text-analysis/analysis-pattern-replace-charfilter.md new file mode 100644 index 0000000000000..8b0e73c56560e --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-pattern-replace-charfilter.md @@ -0,0 +1,197 @@ +--- +navigation_title: "Pattern replace" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-replace-charfilter.html +--- + +# Pattern replace character filter [analysis-pattern-replace-charfilter] + + +The `pattern_replace` character filter uses a regular expression to match characters which should be replaced with the specified replacement string. The replacement string can refer to capture groups in the regular expression. + +::::{admonition} Beware of Pathological Regular Expressions +:class: warning + +The pattern replace character filter uses [Java Regular Expressions](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md). + +A badly written regular expression could run very slowly or even throw a StackOverflowError and cause the node it is running on to exit suddenly. + +Read more about [pathological regular expressions and how to avoid them](https://www.regular-expressions.info/catastrophic.md). + +:::: + + + +## Configuration [_configuration_23] + +The `pattern_replace` character filter accepts the following parameters: + +`pattern` +: A [Java regular expression](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md). Required. + +`replacement` +: The replacement string, which can reference capture groups using the `$1`..`$9` syntax, as explained [here](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Matcher.md#appendReplacement-java.lang.StringBuffer-java.lang.String-). + +`flags` +: Java regular expression [flags](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md#field.summary). Flags should be pipe-separated, eg `"CASE_INSENSITIVE|COMMENTS"`. + + +## Example configuration [_example_configuration_15] + +In this example, we configure the `pattern_replace` character filter to replace any embedded dashes in numbers with underscores, i.e `123-456-789` → `123_456_789`: + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "standard", + "char_filter": [ + "my_char_filter" + ] + } + }, + "char_filter": { + "my_char_filter": { + "type": "pattern_replace", + "pattern": "(\\d+)-(?=\\d)", + "replacement": "$1_" + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_analyzer", + "text": "My credit card is 123-456-789" +} +``` + +The above example produces the following terms: + +```text +[ My, credit, card, is, 123_456_789 ] +``` + +::::{warning} +Using a replacement string that changes the length of the original text will work for search purposes, but will result in incorrect highlighting, as can be seen in the following example. +:::: + + +This example inserts a space whenever it encounters a lower-case letter followed by an upper-case letter (i.e. `fooBarBaz` → `foo Bar Baz`), allowing camelCase words to be queried individually: + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "standard", + "char_filter": [ + "my_char_filter" + ], + "filter": [ + "lowercase" + ] + } + }, + "char_filter": { + "my_char_filter": { + "type": "pattern_replace", + "pattern": "(?<=\\p{Lower})(?=\\p{Upper})", + "replacement": " " + } + } + } + }, + "mappings": { + "properties": { + "text": { + "type": "text", + "analyzer": "my_analyzer" + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_analyzer", + "text": "The fooBarBaz method" +} +``` + +The above returns the following terms: + +```text +[ the, foo, bar, baz, method ] +``` + +Querying for `bar` will find the document correctly, but highlighting on the result will produce incorrect highlights, because our character filter changed the length of the original text: + +```console +PUT my-index-000001/_doc/1?refresh +{ + "text": "The fooBarBaz method" +} + +GET my-index-000001/_search +{ + "query": { + "match": { + "text": "bar" + } + }, + "highlight": { + "fields": { + "text": {} + } + } +} +``` + +The output from the above is: + +```console-result +{ + "timed_out": false, + "took": $body.took, + "_shards": { + "total": 1, + "successful": 1, + "skipped" : 0, + "failed": 0 + }, + "hits": { + "total" : { + "value": 1, + "relation": "eq" + }, + "max_score": 0.2876821, + "hits": [ + { + "_index": "my-index-000001", + "_id": "1", + "_score": 0.2876821, + "_source": { + "text": "The fooBarBaz method" + }, + "highlight": { + "text": [ + "The fooBarBaz method" <1> + ] + } + } + ] + } +} +``` + +1. Note the incorrect highlight. + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-pattern-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-pattern-tokenizer.md new file mode 100644 index 0000000000000..86f041dfd01c2 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-pattern-tokenizer.md @@ -0,0 +1,148 @@ +--- +navigation_title: "Pattern" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-tokenizer.html +--- + +# Pattern tokenizer [analysis-pattern-tokenizer] + + +The `pattern` tokenizer uses a regular expression to either split text into terms whenever it matches a word separator, or to capture matching text as terms. + +The default pattern is `\W+`, which splits text whenever it encounters non-word characters. + +::::{admonition} Beware of Pathological Regular Expressions +:class: warning + +The pattern tokenizer uses [Java Regular Expressions](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md). + +A badly written regular expression could run very slowly or even throw a StackOverflowError and cause the node it is running on to exit suddenly. + +Read more about [pathological regular expressions and how to avoid them](https://www.regular-expressions.info/catastrophic.md). + +:::: + + + +## Example output [_example_output_15] + +```console +POST _analyze +{ + "tokenizer": "pattern", + "text": "The foo_bar_size's default is 5." +} +``` + +The above sentence would produce the following terms: + +```text +[ The, foo_bar_size, s, default, is, 5 ] +``` + + +## Configuration [_configuration_16] + +The `pattern` tokenizer accepts the following parameters: + +`pattern` +: A [Java regular expression](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md), defaults to `\W+`. + +`flags` +: Java regular expression [flags](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md#field.summary). Flags should be pipe-separated, eg `"CASE_INSENSITIVE|COMMENTS"`. + +`group` +: Which capture group to extract as tokens. Defaults to `-1` (split). + + +## Example configuration [_example_configuration_10] + +In this example, we configure the `pattern` tokenizer to break text into tokens when it encounters commas: + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "my_tokenizer" + } + }, + "tokenizer": { + "my_tokenizer": { + "type": "pattern", + "pattern": "," + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_analyzer", + "text": "comma,separated,values" +} +``` + +The above example produces the following terms: + +```text +[ comma, separated, values ] +``` + +In the next example, we configure the `pattern` tokenizer to capture values enclosed in double quotes (ignoring embedded escaped quotes `\"`). The regex itself looks like this: + +``` +"((?:\\"|[^"]|\\")*)" +``` +And reads as follows: + +* A literal `"` +* Start capturing: + + * A literal `\"` OR any character except `"` + * Repeat until no more characters match + +* A literal closing `"` + +When the pattern is specified in JSON, the `"` and `\` characters need to be escaped, so the pattern ends up looking like: + +``` +\"((?:\\\\\"|[^\"]|\\\\\")+)\" +``` +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "my_tokenizer" + } + }, + "tokenizer": { + "my_tokenizer": { + "type": "pattern", + "pattern": "\"((?:\\\\\"|[^\"]|\\\\\")+)\"", + "group": 1 + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_analyzer", + "text": "\"value\", \"value with embedded \\\" quote\"" +} +``` + +The above example produces the following two terms: + +```text +[ value, value with embedded \" quote ] +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-pattern_replace-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-pattern_replace-tokenfilter.md new file mode 100644 index 0000000000000..f5f2775ddcfb4 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-pattern_replace-tokenfilter.md @@ -0,0 +1,96 @@ +--- +navigation_title: "Pattern replace" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern_replace-tokenfilter.html +--- + +# Pattern replace token filter [analysis-pattern_replace-tokenfilter] + + +Uses a regular expression to match and replace token substrings. + +The `pattern_replace` filter uses [Java’s regular expression syntax](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md). By default, the filter replaces matching substrings with an empty substring (`""`). Replacement substrings can use Java’s [`$g` syntax](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Matcher.md#appendReplacement-java.lang.StringBuffer-java.lang.String-) to reference capture groups from the original token text. + +::::{warning} +A poorly-written regular expression may run slowly or return a StackOverflowError, causing the node running the expression to exit suddenly. + +Read more about [pathological regular expressions and how to avoid them](https://www.regular-expressions.info/catastrophic.md). + +:::: + + +This filter uses Lucene’s [PatternReplaceFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/pattern/PatternReplaceFilter.md). + +## Example [analysis-pattern-replace-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `pattern_replace` filter to prepend `watch` to the substring `dog` in `foxes jump lazy dogs`. + +```console +GET /_analyze +{ + "tokenizer": "whitespace", + "filter": [ + { + "type": "pattern_replace", + "pattern": "(dog)", + "replacement": "watch$1" + } + ], + "text": "foxes jump lazy dogs" +} +``` + +The filter produces the following tokens. + +```text +[ foxes, jump, lazy, watchdogs ] +``` + + +## Configurable parameters [analysis-pattern-replace-tokenfilter-configure-parms] + +`all` +: (Optional, Boolean) If `true`, all substrings matching the `pattern` parameter’s regular expression are replaced. If `false`, the filter replaces only the first matching substring in each token. Defaults to `true`. + +`pattern` +: (Required, string) Regular expression, written in [Java’s regular expression syntax](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.md). The filter replaces token substrings matching this pattern with the substring in the `replacement` parameter. + +`replacement` +: (Optional, string) Replacement substring. Defaults to an empty substring (`""`). + + +## Customize and add to an analyzer [analysis-pattern-replace-tokenfilter-customize] + +To customize the `pattern_replace` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request configures a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md) using a custom `pattern_replace` filter, `my_pattern_replace_filter`. + +The `my_pattern_replace_filter` filter uses the regular expression `[£|€]` to match and remove the currency symbols `£` and `€`. The filter’s `all` parameter is `false`, meaning only the first matching symbol in each token is removed. + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "keyword", + "filter": [ + "my_pattern_replace_filter" + ] + } + }, + "filter": { + "my_pattern_replace_filter": { + "type": "pattern_replace", + "pattern": "[£|€]", + "replacement": "", + "all": false + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-phonetic-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-phonetic-tokenfilter.md new file mode 100644 index 0000000000000..138a5cb2f7fa4 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-phonetic-tokenfilter.md @@ -0,0 +1,11 @@ +--- +navigation_title: "Phonetic" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-phonetic-tokenfilter.html +--- + +# Phonetic token filter [analysis-phonetic-tokenfilter] + + +The `phonetic` token filter is provided as the [`analysis-phonetic`](/reference/elasticsearch-plugins/analysis-phonetic.md) plugin. + diff --git a/docs/reference/data-analysis/text-analysis/analysis-porterstem-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-porterstem-tokenfilter.md new file mode 100644 index 0000000000000..633cee53e9a37 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-porterstem-tokenfilter.md @@ -0,0 +1,67 @@ +--- +navigation_title: "Porter stem" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-porterstem-tokenfilter.html +--- + +# Porter stem token filter [analysis-porterstem-tokenfilter] + + +Provides [algorithmic stemming](docs-content://manage-data/data-store/text-analysis/stemming.md#algorithmic-stemmers) for the English language, based on the [Porter stemming algorithm](https://snowballstem.org/algorithms/porter/stemmer.md). + +This filter tends to stem more aggressively than other English stemmer filters, such as the [`kstem`](/reference/data-analysis/text-analysis/analysis-kstem-tokenfilter.md) filter. + +The `porter_stem` filter is equivalent to the [`stemmer`](/reference/data-analysis/text-analysis/analysis-stemmer-tokenfilter.md) filter’s [`english`](/reference/data-analysis/text-analysis/analysis-stemmer-tokenfilter.md#analysis-stemmer-tokenfilter-language-parm) variant. + +The `porter_stem` filter uses Lucene’s [PorterStemFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/en/PorterStemFilter.md). + +## Example [analysis-porterstem-tokenfilter-analyze-ex] + +The following analyze API request uses the `porter_stem` filter to stem `the foxes jumping quickly` to `the fox jump quickli`: + +```console +GET /_analyze +{ + "tokenizer": "standard", + "filter": [ "porter_stem" ], + "text": "the foxes jumping quickly" +} +``` + +The filter produces the following tokens: + +```text +[ the, fox, jump, quickli ] +``` + + +## Add to an analyzer [analysis-porterstem-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `porter_stem` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +::::{important} +To work properly, the `porter_stem` filter requires lowercase tokens. To ensure tokens are lowercased, add the [`lowercase`](/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md) filter before the `porter_stem` filter in the analyzer configuration. + +:::: + + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "whitespace", + "filter": [ + "lowercase", + "porter_stem" + ] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-predicatefilter-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-predicatefilter-tokenfilter.md new file mode 100644 index 0000000000000..673a1aa177cf8 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-predicatefilter-tokenfilter.md @@ -0,0 +1,113 @@ +--- +navigation_title: "Predicate script" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-predicatefilter-tokenfilter.html +--- + +# Predicate script token filter [analysis-predicatefilter-tokenfilter] + + +Removes tokens that don’t match a provided predicate script. The filter supports inline [Painless](/reference/scripting-languages/painless/painless.md) scripts only. Scripts are evaluated in the [analysis predicate context](/reference/scripting-languages/painless/painless-analysis-predicate-context.md). + +## Example [analysis-predicatefilter-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `predicate_token_filter` filter to only output tokens longer than three characters from `the fox jumps the lazy dog`. + +```console +GET /_analyze +{ + "tokenizer": "whitespace", + "filter": [ + { + "type": "predicate_token_filter", + "script": { + "source": """ + token.term.length() > 3 + """ + } + } + ], + "text": "the fox jumps the lazy dog" +} +``` + +The filter produces the following tokens. + +```text +[ jumps, lazy ] +``` + +The API response contains the position and offsets of each output token. Note the `predicate_token_filter` filter does not change the tokens' original positions or offsets. + +::::{dropdown} **Response** +```console-result +{ + "tokens" : [ + { + "token" : "jumps", + "start_offset" : 8, + "end_offset" : 13, + "type" : "word", + "position" : 2 + }, + { + "token" : "lazy", + "start_offset" : 18, + "end_offset" : 22, + "type" : "word", + "position" : 4 + } + ] +} +``` + +:::: + + + +## Configurable parameters [analysis-predicatefilter-tokenfilter-configure-parms] + +`script` +: (Required, [script object](docs-content://explore-analyze/scripting/modules-scripting-using.md)) Script containing a condition used to filter incoming tokens. Only tokens that match this script are included in the output. + + This parameter supports inline [Painless](/reference/scripting-languages/painless/painless.md) scripts only. The script is evaluated in the [analysis predicate context](/reference/scripting-languages/painless/painless-analysis-predicate-context.md). + + + +## Customize and add to an analyzer [analysis-predicatefilter-tokenfilter-customize] + +To customize the `predicate_token_filter` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request configures a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md) using a custom `predicate_token_filter` filter, `my_script_filter`. + +The `my_script_filter` filter removes tokens with of any type other than `ALPHANUM`. + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "standard", + "filter": [ + "my_script_filter" + ] + } + }, + "filter": { + "my_script_filter": { + "type": "predicate_token_filter", + "script": { + "source": """ + token.type.contains("ALPHANUM") + """ + } + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-remove-duplicates-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-remove-duplicates-tokenfilter.md new file mode 100644 index 0000000000000..d61fe1311af1b --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-remove-duplicates-tokenfilter.md @@ -0,0 +1,141 @@ +--- +navigation_title: "Remove duplicates" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-remove-duplicates-tokenfilter.html +--- + +# Remove duplicates token filter [analysis-remove-duplicates-tokenfilter] + + +Removes duplicate tokens in the same position. + +The `remove_duplicates` filter uses Lucene’s [RemoveDuplicatesTokenFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.md). + +## Example [analysis-remove-duplicates-tokenfilter-analyze-ex] + +To see how the `remove_duplicates` filter works, you first need to produce a token stream containing duplicate tokens in the same position. + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the [`keyword_repeat`](/reference/data-analysis/text-analysis/analysis-keyword-repeat-tokenfilter.md) and [`stemmer`](/reference/data-analysis/text-analysis/analysis-stemmer-tokenfilter.md) filters to create stemmed and unstemmed tokens for `jumping dog`. + +```console +GET _analyze +{ + "tokenizer": "whitespace", + "filter": [ + "keyword_repeat", + "stemmer" + ], + "text": "jumping dog" +} +``` + +The API returns the following response. Note that the `dog` token in position `1` is duplicated. + +```console-result +{ + "tokens": [ + { + "token": "jumping", + "start_offset": 0, + "end_offset": 7, + "type": "word", + "position": 0 + }, + { + "token": "jump", + "start_offset": 0, + "end_offset": 7, + "type": "word", + "position": 0 + }, + { + "token": "dog", + "start_offset": 8, + "end_offset": 11, + "type": "word", + "position": 1 + }, + { + "token": "dog", + "start_offset": 8, + "end_offset": 11, + "type": "word", + "position": 1 + } + ] +} +``` + +To remove one of the duplicate `dog` tokens, add the `remove_duplicates` filter to the previous analyze API request. + +```console +GET _analyze +{ + "tokenizer": "whitespace", + "filter": [ + "keyword_repeat", + "stemmer", + "remove_duplicates" + ], + "text": "jumping dog" +} +``` + +The API returns the following response. There is now only one `dog` token in position `1`. + +```console-result +{ + "tokens": [ + { + "token": "jumping", + "start_offset": 0, + "end_offset": 7, + "type": "word", + "position": 0 + }, + { + "token": "jump", + "start_offset": 0, + "end_offset": 7, + "type": "word", + "position": 0 + }, + { + "token": "dog", + "start_offset": 8, + "end_offset": 11, + "type": "word", + "position": 1 + } + ] +} +``` + + +## Add to an analyzer [analysis-remove-duplicates-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `remove_duplicates` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +This custom analyzer uses the `keyword_repeat` and `stemmer` filters to create a stemmed and unstemmed version of each token in a stream. The `remove_duplicates` filter then removes any duplicate tokens in the same position. + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_custom_analyzer": { + "tokenizer": "standard", + "filter": [ + "keyword_repeat", + "stemmer", + "remove_duplicates" + ] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-reverse-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-reverse-tokenfilter.md new file mode 100644 index 0000000000000..d862aadf115c5 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-reverse-tokenfilter.md @@ -0,0 +1,56 @@ +--- +navigation_title: "Reverse" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-reverse-tokenfilter.html +--- + +# Reverse token filter [analysis-reverse-tokenfilter] + + +Reverses each token in a stream. For example, you can use the `reverse` filter to change `cat` to `tac`. + +Reversed tokens are useful for suffix-based searches, such as finding words that end in `-ion` or searching file names by their extension. + +This filter uses Lucene’s [ReverseStringFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/reverse/ReverseStringFilter.md). + +## Example [analysis-reverse-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `reverse` filter to reverse each token in `quick fox jumps`: + +```console +GET _analyze +{ + "tokenizer" : "standard", + "filter" : ["reverse"], + "text" : "quick fox jumps" +} +``` + +The filter produces the following tokens: + +```text +[ kciuq, xof, spmuj ] +``` + + +## Add to an analyzer [analysis-reverse-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `reverse` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT reverse_example +{ + "settings" : { + "analysis" : { + "analyzer" : { + "whitespace_reverse" : { + "tokenizer" : "whitespace", + "filter" : ["reverse"] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-shingle-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-shingle-tokenfilter.md new file mode 100644 index 0000000000000..e94549f0e2682 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-shingle-tokenfilter.md @@ -0,0 +1,213 @@ +--- +navigation_title: "Shingle" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-shingle-tokenfilter.html +--- + +# Shingle token filter [analysis-shingle-tokenfilter] + + +Add shingles, or word [n-grams](https://en.wikipedia.org/wiki/N-gram), to a token stream by concatenating adjacent tokens. By default, the `shingle` token filter outputs two-word shingles and unigrams. + +For example, many tokenizers convert `the lazy dog` to `[ the, lazy, dog ]`. You can use the `shingle` filter to add two-word shingles to this stream: `[ the, the lazy, lazy, lazy dog, dog ]`. + +::::{tip} +Shingles are often used to help speed up phrase queries, such as [`match_phrase`](/reference/query-languages/query-dsl-match-query-phrase.md). Rather than creating shingles using the `shingles` filter, we recommend you use the [`index-phrases`](/reference/elasticsearch/mapping-reference/index-phrases.md) mapping parameter on the appropriate [text](/reference/elasticsearch/mapping-reference/text.md) field instead. +:::: + + +This filter uses Lucene’s [ShingleFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/shingle/ShingleFilter.md). + +## Example [analysis-shingle-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `shingle` filter to add two-word shingles to the token stream for `quick brown fox jumps`: + +```console +GET /_analyze +{ + "tokenizer": "whitespace", + "filter": [ "shingle" ], + "text": "quick brown fox jumps" +} +``` + +The filter produces the following tokens: + +```text +[ quick, quick brown, brown, brown fox, fox, fox jumps, jumps ] +``` + +To produce shingles of 2-3 words, add the following arguments to the analyze API request: + +* `min_shingle_size`: `2` +* `max_shingle_size`: `3` + +```console +GET /_analyze +{ + "tokenizer": "whitespace", + "filter": [ + { + "type": "shingle", + "min_shingle_size": 2, + "max_shingle_size": 3 + } + ], + "text": "quick brown fox jumps" +} +``` + +The filter produces the following tokens: + +```text +[ quick, quick brown, quick brown fox, brown, brown fox, brown fox jumps, fox, fox jumps, jumps ] +``` + +To only include shingles in the output, add an `output_unigrams` argument of `false` to the request. + +```console +GET /_analyze +{ + "tokenizer": "whitespace", + "filter": [ + { + "type": "shingle", + "min_shingle_size": 2, + "max_shingle_size": 3, + "output_unigrams": false + } + ], + "text": "quick brown fox jumps" +} +``` + +The filter produces the following tokens: + +```text +[ quick brown, quick brown fox, brown fox, brown fox jumps, fox jumps ] +``` + + +## Add to an analyzer [analysis-shingle-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `shingle` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "standard_shingle": { + "tokenizer": "standard", + "filter": [ "shingle" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-shingle-tokenfilter-configure-parms] + +`max_shingle_size` +: (Optional, integer) Maximum number of tokens to concatenate when creating shingles. Defaults to `2`. + + ::::{note} + This value cannot be lower than the `min_shingle_size` argument, which defaults to `2`. The difference between this value and the `min_shingle_size` argument cannot exceed the [`index.max_shingle_diff`](/reference/elasticsearch/index-settings/index-modules.md#index-max-shingle-diff) index-level setting, which defaults to `3`. + :::: + + +`min_shingle_size` +: (Optional, integer) Minimum number of tokens to concatenate when creating shingles. Defaults to `2`. + + ::::{note} + This value cannot exceed the `max_shingle_size` argument, which defaults to `2`. The difference between the `max_shingle_size` argument and this value cannot exceed the [`index.max_shingle_diff`](/reference/elasticsearch/index-settings/index-modules.md#index-max-shingle-diff) index-level setting, which defaults to `3`. + :::: + + +`output_unigrams` +: (Optional, Boolean) If `true`, the output includes the original input tokens. If `false`, the output only includes shingles; the original input tokens are removed. Defaults to `true`. + +`output_unigrams_if_no_shingles` +: If `true`, the output includes the original input tokens only if no shingles are produced; if shingles are produced, the output only includes shingles. Defaults to `false`. + + ::::{important} + If both this and the `output_unigrams` parameter are `true`, only the `output_unigrams` argument is used. + :::: + + +`token_separator` +: (Optional, string) Separator used to concatenate adjacent tokens to form a shingle. Defaults to a space (`" "`). + +`filler_token` +: (Optional, string) String used in shingles as a replacement for empty positions that do not contain a token. This filler token is only used in shingles, not original unigrams. Defaults to an underscore (`_`). + +Some token filters, such as the `stop` filter, create empty positions when removing stop words with a position increment greater than one. + +::::{dropdown} **Example** +In the following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request, the `stop` filter removes the stop word `a` from `fox jumps a lazy dog`, creating an empty position. The subsequent `shingle` filter replaces this empty position with a plus sign (`+`) in shingles. + +```console +GET /_analyze +{ + "tokenizer": "whitespace", + "filter": [ + { + "type": "stop", + "stopwords": [ "a" ] + }, + { + "type": "shingle", + "filler_token": "+" + } + ], + "text": "fox jumps a lazy dog" +} +``` + +The filter produces the following tokens: + +```text +[ fox, fox jumps, jumps, jumps +, + lazy, lazy, lazy dog, dog ] +``` + +:::: + + + + +## Customize [analysis-shingle-tokenfilter-customize] + +To customize the `shingle` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses a custom `shingle` filter, `my_shingle_filter`, to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +The `my_shingle_filter` filter uses a `min_shingle_size` of `2` and a `max_shingle_size` of `5`, meaning it produces shingles of 2-5 words. The filter also includes a `output_unigrams` argument of `false`, meaning that only shingles are included in the output. + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "en": { + "tokenizer": "standard", + "filter": [ "my_shingle_filter" ] + } + }, + "filter": { + "my_shingle_filter": { + "type": "shingle", + "min_shingle_size": 2, + "max_shingle_size": 5, + "output_unigrams": false + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-simple-analyzer.md b/docs/reference/data-analysis/text-analysis/analysis-simple-analyzer.md new file mode 100644 index 0000000000000..e5734df6cc45d --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-simple-analyzer.md @@ -0,0 +1,62 @@ +--- +navigation_title: "Simple" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-simple-analyzer.html +--- + +# Simple analyzer [analysis-simple-analyzer] + + +The `simple` analyzer breaks text into tokens at any non-letter character, such as numbers, spaces, hyphens and apostrophes, discards non-letter characters, and changes uppercase to lowercase. + +## Example [analysis-simple-analyzer-ex] + +```console +POST _analyze +{ + "analyzer": "simple", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The `simple` analyzer parses the sentence and produces the following tokens: + +```text +[ the, quick, brown, foxes, jumped, over, the, lazy, dog, s, bone ] +``` + + +## Definition [analysis-simple-analyzer-definition] + +The `simple` analyzer is defined by one tokenizer: + +Tokenizer +: * [Lowercase Tokenizer](/reference/data-analysis/text-analysis/analysis-lowercase-tokenizer.md) + + + +## Customize [analysis-simple-analyzer-customize] + +To customize the `simple` analyzer, duplicate it to create the basis for a custom analyzer. This custom analyzer can be modified as required, usually by adding token filters. + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_custom_simple_analyzer": { + "tokenizer": "lowercase", + "filter": [ <1> + ] + } + } + } + } +} +``` + +1. Add token filters here. + + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-simplepattern-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-simplepattern-tokenizer.md new file mode 100644 index 0000000000000..67d0f7e991f04 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-simplepattern-tokenizer.md @@ -0,0 +1,63 @@ +--- +navigation_title: "Simple pattern" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-simplepattern-tokenizer.html +--- + +# Simple pattern tokenizer [analysis-simplepattern-tokenizer] + + +The `simple_pattern` tokenizer uses a regular expression to capture matching text as terms. The set of regular expression features it supports is more limited than the [`pattern`](/reference/data-analysis/text-analysis/analysis-pattern-tokenizer.md) tokenizer, but the tokenization is generally faster. + +This tokenizer does not support splitting the input on a pattern match, unlike the [`pattern`](/reference/data-analysis/text-analysis/analysis-pattern-tokenizer.md) tokenizer. To split on pattern matches using the same restricted regular expression subset, see the [`simple_pattern_split`](/reference/data-analysis/text-analysis/analysis-simplepatternsplit-tokenizer.md) tokenizer. + +This tokenizer uses [Lucene regular expressions](https://lucene.apache.org/core/10_0_0/core/org/apache/lucene/util/automaton/RegExp.md). For an explanation of the supported features and syntax, see [Regular Expression Syntax](/reference/query-languages/regexp-syntax.md). + +The default pattern is the empty string, which produces no terms. This tokenizer should always be configured with a non-default pattern. + + +## Configuration [_configuration_17] + +The `simple_pattern` tokenizer accepts the following parameters: + +`pattern` +: [Lucene regular expression](https://lucene.apache.org/core/10_0_0/core/org/apache/lucene/util/automaton/RegExp.md), defaults to the empty string. + + +## Example configuration [_example_configuration_11] + +This example configures the `simple_pattern` tokenizer to produce terms that are three-digit numbers + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "my_tokenizer" + } + }, + "tokenizer": { + "my_tokenizer": { + "type": "simple_pattern", + "pattern": "[0123456789]{3}" + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_analyzer", + "text": "fd-786-335-514-x" +} +``` + +The above example produces these terms: + +```text +[ 786, 335, 514 ] +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-simplepatternsplit-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-simplepatternsplit-tokenizer.md new file mode 100644 index 0000000000000..bde66940647ca --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-simplepatternsplit-tokenizer.md @@ -0,0 +1,63 @@ +--- +navigation_title: "Simple pattern split" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-simplepatternsplit-tokenizer.html +--- + +# Simple pattern split tokenizer [analysis-simplepatternsplit-tokenizer] + + +The `simple_pattern_split` tokenizer uses a regular expression to split the input into terms at pattern matches. The set of regular expression features it supports is more limited than the [`pattern`](/reference/data-analysis/text-analysis/analysis-pattern-tokenizer.md) tokenizer, but the tokenization is generally faster. + +This tokenizer does not produce terms from the matches themselves. To produce terms from matches using patterns in the same restricted regular expression subset, see the [`simple_pattern`](/reference/data-analysis/text-analysis/analysis-simplepattern-tokenizer.md) tokenizer. + +This tokenizer uses [Lucene regular expressions](https://lucene.apache.org/core/10_0_0/core/org/apache/lucene/util/automaton/RegExp.md). For an explanation of the supported features and syntax, see [Regular Expression Syntax](/reference/query-languages/regexp-syntax.md). + +The default pattern is the empty string, which produces one term containing the full input. This tokenizer should always be configured with a non-default pattern. + + +## Configuration [_configuration_18] + +The `simple_pattern_split` tokenizer accepts the following parameters: + +`pattern` +: A [Lucene regular expression](https://lucene.apache.org/core/10_0_0/core/org/apache/lucene/util/automaton/RegExp.md), defaults to the empty string. + + +## Example configuration [_example_configuration_12] + +This example configures the `simple_pattern_split` tokenizer to split the input text on underscores. + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "my_tokenizer" + } + }, + "tokenizer": { + "my_tokenizer": { + "type": "simple_pattern_split", + "pattern": "_" + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_analyzer", + "text": "an_underscored_phrase" +} +``` + +The above example produces these terms: + +```text +[ an, underscored, phrase ] +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-snowball-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-snowball-tokenfilter.md new file mode 100644 index 0000000000000..3d942fcefe200 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-snowball-tokenfilter.md @@ -0,0 +1,41 @@ +--- +navigation_title: "Snowball" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-snowball-tokenfilter.html +applies_to: + stack: all +--- + +# Snowball token filter [analysis-snowball-tokenfilter] + + +A filter that stems words using a Snowball-generated stemmer. The `language` parameter controls the stemmer with the following available values: `Arabic`, `Armenian`, `Basque`, `Catalan`, `Danish`, `Dutch`, `English`, `Estonian`, `Finnish`, `French`, `German`, `German2`, `Hungarian`, `Italian`, `Irish`, `Kp`, `Lithuanian`, `Lovins`, `Norwegian`, `Porter`, `Portuguese`, `Romanian`, `Russian`, `Serbian`, `Spanish`, `Swedish`, `Turkish`. + +:::{note} +The `Kp` and `Lovins` values are deprecated in 8.16.0 and will be removed in a future version. +::: + +For example: + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "standard", + "filter": [ "lowercase", "my_snow" ] + } + }, + "filter": { + "my_snow": { + "type": "snowball", + "language": "English" + } + } + } + } +} +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-standard-analyzer.md b/docs/reference/data-analysis/text-analysis/analysis-standard-analyzer.md new file mode 100644 index 0000000000000..1c648d55998e7 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-standard-analyzer.md @@ -0,0 +1,115 @@ +--- +navigation_title: "Standard" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html +--- + +# Standard analyzer [analysis-standard-analyzer] + + +The `standard` analyzer is the default analyzer which is used if none is specified. It provides grammar based tokenization (based on the Unicode Text Segmentation algorithm, as specified in [Unicode Standard Annex #29](https://unicode.org/reports/tr29/)) and works well for most languages. + + +## Example output [_example_output_4] + +```console +POST _analyze +{ + "analyzer": "standard", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above sentence would produce the following terms: + +```text +[ the, 2, quick, brown, foxes, jumped, over, the, lazy, dog's, bone ] +``` + + +## Configuration [_configuration_5] + +The `standard` analyzer accepts the following parameters: + +`max_token_length` +: The maximum token length. If a token is seen that exceeds this length then it is split at `max_token_length` intervals. Defaults to `255`. + +`stopwords` +: A pre-defined stop words list like `_english_` or an array containing a list of stop words. Defaults to `_none_`. + +`stopwords_path` +: The path to a file containing stop words. + +See the [Stop Token Filter](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) for more information about stop word configuration. + + +## Example configuration [_example_configuration_4] + +In this example, we configure the `standard` analyzer to have a `max_token_length` of 5 (for demonstration purposes), and to use the pre-defined list of English stop words: + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_english_analyzer": { + "type": "standard", + "max_token_length": 5, + "stopwords": "_english_" + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_english_analyzer", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above example produces the following terms: + +```text +[ 2, quick, brown, foxes, jumpe, d, over, lazy, dog's, bone ] +``` + + +## Definition [_definition_4] + +The `standard` analyzer consists of: + +Tokenizer +: * [Standard Tokenizer](/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md) + + +Token Filters +: * [Lower Case Token Filter](/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md) +* [Stop Token Filter](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) (disabled by default) + + +If you need to customize the `standard` analyzer beyond the configuration parameters then you need to recreate it as a `custom` analyzer and modify it, usually by adding token filters. This would recreate the built-in `standard` analyzer and you can use it as a starting point: + +```console +PUT /standard_example +{ + "settings": { + "analysis": { + "analyzer": { + "rebuilt_standard": { + "tokenizer": "standard", + "filter": [ + "lowercase" <1> + ] + } + } + } + } +} +``` + +1. You’d add any token filters after `lowercase`. + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md new file mode 100644 index 0000000000000..eaab13db4c2aa --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md @@ -0,0 +1,74 @@ +--- +navigation_title: "Standard" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-tokenizer.html +--- + +# Standard tokenizer [analysis-standard-tokenizer] + + +The `standard` tokenizer provides grammar based tokenization (based on the Unicode Text Segmentation algorithm, as specified in [Unicode Standard Annex #29](https://unicode.org/reports/tr29/)) and works well for most languages. + + +## Example output [_example_output_16] + +```console +POST _analyze +{ + "tokenizer": "standard", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above sentence would produce the following terms: + +```text +[ The, 2, QUICK, Brown, Foxes, jumped, over, the, lazy, dog's, bone ] +``` + + +## Configuration [_configuration_19] + +The `standard` tokenizer accepts the following parameters: + +`max_token_length` +: The maximum token length. If a token is seen that exceeds this length then it is split at `max_token_length` intervals. Defaults to `255`. + + +## Example configuration [_example_configuration_13] + +In this example, we configure the `standard` tokenizer to have a `max_token_length` of 5 (for demonstration purposes): + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "my_tokenizer" + } + }, + "tokenizer": { + "my_tokenizer": { + "type": "standard", + "max_token_length": 5 + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_analyzer", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above example produces the following terms: + +```text +[ The, 2, QUICK, Brown, Foxes, jumpe, d, over, the, lazy, dog's, bone ] +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-stemmer-override-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-stemmer-override-tokenfilter.md new file mode 100644 index 0000000000000..9444fb747e280 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-stemmer-override-tokenfilter.md @@ -0,0 +1,77 @@ +--- +navigation_title: "Stemmer override" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-stemmer-override-tokenfilter.html +--- + +# Stemmer override token filter [analysis-stemmer-override-tokenfilter] + + +Overrides stemming algorithms, by applying a custom mapping, then protecting these terms from being modified by stemmers. Must be placed before any stemming filters. + +Rules are mappings in the form of `token1[, ..., tokenN] => override`. + +| Setting | Description | +| --- | --- | +| `rules` | A list of mapping rules to use. | +| `rules_path` | A path (either relative to `config` location, orabsolute) to a list of mappings. | + +Here is an example: + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "standard", + "filter": [ "lowercase", "custom_stems", "porter_stem" ] + } + }, + "filter": { + "custom_stems": { + "type": "stemmer_override", + "rules_path": "analysis/stemmer_override.txt" + } + } + } + } +} +``` + +Where the file looks like: + +```text +running, runs => run + +stemmer => stemmer +``` + +You can also define the overrides rules inline: + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "standard", + "filter": [ "lowercase", "custom_stems", "porter_stem" ] + } + }, + "filter": { + "custom_stems": { + "type": "stemmer_override", + "rules": [ + "running, runs => run", + "stemmer => stemmer" + ] + } + } + } + } +} +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-stemmer-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-stemmer-tokenfilter.md new file mode 100644 index 0000000000000..bce8a37f5eba1 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-stemmer-tokenfilter.md @@ -0,0 +1,135 @@ +--- +navigation_title: "Stemmer" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-stemmer-tokenfilter.html +--- + +# Stemmer token filter [analysis-stemmer-tokenfilter] + + +Provides [algorithmic stemming](docs-content://manage-data/data-store/text-analysis/stemming.md#algorithmic-stemmers) for several languages, some with additional variants. For a list of supported languages, see the [`language`](#analysis-stemmer-tokenfilter-language-parm) parameter. + +When not customized, the filter uses the [porter stemming algorithm](https://snowballstem.org/algorithms/porter/stemmer.md) for English. + +## Example [analysis-stemmer-tokenfilter-analyze-ex] + +The following analyze API request uses the `stemmer` filter’s default porter stemming algorithm to stem `the foxes jumping quickly` to `the fox jump quickli`: + +```console +GET /_analyze +{ + "tokenizer": "standard", + "filter": [ "stemmer" ], + "text": "the foxes jumping quickly" +} +``` + +The filter produces the following tokens: + +```text +[ the, fox, jump, quickli ] +``` + + +## Add to an analyzer [analysis-stemmer-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `stemmer` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "whitespace", + "filter": [ "stemmer" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-stemmer-tokenfilter-configure-parms] + +$$$analysis-stemmer-tokenfilter-language-parm$$$ + +`language`: (Optional, string) Language-dependent stemming algorithm used to stem tokens. If both this and the `name` parameter are specified, the `language` parameter argument is used. + +:::{dropdown} Valid values for `language` + +Valid values are sorted by language. Defaults to [**`english`**](https://snowballstem.org/algorithms/porter/stemmer.md). Recommended algorithms are **bolded**. +Arabic: [**`arabic`**](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/ar/ArabicStemmer.md) +Armenian: [**`armenian`**](https://snowballstem.org/algorithms/armenian/stemmer.md) +Basque: [**`basque`**](https://snowballstem.org/algorithms/basque/stemmer.md) +Bengali:[**`bengali`**](https://www.tandfonline.com/doi/abs/10.1080/02564602.1993.11437284) +Brazilian Portuguese:[**`brazilian`**](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/br/BrazilianStemmer.md) +Bulgarian:[**`bulgarian`**](http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf) +Catalan:[**`catalan`**](https://snowballstem.org/algorithms/catalan/stemmer.md) +Czech:[**`czech`**](https://dl.acm.org/doi/10.1016/j.ipm.2009.06.001) +Danish:[**`danish`**](https://snowballstem.org/algorithms/danish/stemmer.md) +Dutch:[**`dutch`**](https://snowballstem.org/algorithms/dutch/stemmer.md), [`dutch_kp`](https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.md) [8.16.0] +English:[**`english`**](https://snowballstem.org/algorithms/porter/stemmer.md), [`light_english`](https://ciir.cs.umass.edu/pubfiles/ir-35.pdf), [`lovins`](https://snowballstem.org/algorithms/lovins/stemmer.md) [8.16.0], [`minimal_english`](https://www.researchgate.net/publication/220433848_How_effective_is_suffixing), [`porter2`](https://snowballstem.org/algorithms/english/stemmer.md), [`possessive_english`](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/en/EnglishPossessiveFilter.md) +Estonian:[**`estonian`**](https://lucene.apache.org/core/10_0_0/analyzers-common/org/tartarus/snowball/ext/EstonianStemmer.md) +Finnish:[**`finnish`**](https://snowballstem.org/algorithms/finnish/stemmer.md), [`light_finnish`](http://clef.isti.cnr.it/2003/WN_web/22.pdf) +French:[**`light_french`**](https://dl.acm.org/citation.cfm?id=1141523), [`french`](https://snowballstem.org/algorithms/french/stemmer.md), [`minimal_french`](https://dl.acm.org/citation.cfm?id=318984) +Galician:[**`galician`**](http://bvg.udc.es/recursos_lingua/stemming.jsp), [`minimal_galician`](http://bvg.udc.es/recursos_lingua/stemming.jsp) (Plural step only) +German:[**`light_german`**](https://dl.acm.org/citation.cfm?id=1141523), [`german`](https://snowballstem.org/algorithms/german/stemmer.md), [`minimal_german`](http://members.unine.ch/jacques.savoy/clef/morpho.pdf) +Greek:[**`greek`**](https://sais.se/mthprize/2007/ntais2007.pdf) +Hindi:[**`hindi`**](http://computing.open.ac.uk/Sites/EACLSouthAsia/Papers/p6-Ramanathan.pdf) +Hungarian:[**`hungarian`**](https://snowballstem.org/algorithms/hungarian/stemmer.md), [`light_hungarian`](https://dl.acm.org/citation.cfm?id=1141523&dl=ACM&coll=DL&CFID=179095584&CFTOKEN=80067181) +Indonesian:[**`indonesian`**](http://www.illc.uva.nl/Publications/ResearchReports/MoL-2003-02.text.pdf) +Irish:[**`irish`**](https://snowballstem.org/otherapps/oregan/) +Italian:[**`light_italian`**](https://www.ercim.eu/publication/ws-proceedings/CLEF2/savoy.pdf), [`italian`](https://snowballstem.org/algorithms/italian/stemmer.md) +Kurdish (Sorani):[**`sorani`**](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/ckb/SoraniStemmer.md) +Latvian:[**`latvian`**](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/lv/LatvianStemmer.md) +Lithuanian:[**`lithuanian`**](https://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_5_3/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/stem_ISO_8859_1.sbl?view=markup) +Norwegian (Bokmål):[**`norwegian`**](https://snowballstem.org/algorithms/norwegian/stemmer.md), [**`light_norwegian`**](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/no/NorwegianLightStemmer.md), [`minimal_norwegian`](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/no/NorwegianMinimalStemmer.md) +Norwegian:(Nynorsk)[**`light_nynorsk`**](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/no/NorwegianLightStemmer.md), [`minimal_nynorsk`](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/no/NorwegianMinimalStemmer.md) +Persian:[**`persian`**](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/fa/PersianStemmer.md) +Portuguese:[**`light_portuguese`**](https://dl.acm.org/citation.cfm?id=1141523&dl=ACM&coll=DL&CFID=179095584&CFTOKEN=80067181), [`minimal_portuguese`](http://www.inf.ufrgs.br/~buriol/papers/Orengo_CLEF07.pdf), [`portuguese`](https://snowballstem.org/algorithms/portuguese/stemmer.md), [`portuguese_rslp`](https://www.inf.ufrgs.br/\~viviane/rslp/index.htm) +Romanian:[**`romanian`**](https://snowballstem.org/algorithms/romanian/stemmer.md) +Russian:[**`russian`**](https://snowballstem.org/algorithms/russian/stemmer.md), [`light_russian`](https://doc.rero.ch/lm.php?url=1000%2C43%2C4%2C20091209094227-CA%2FDolamic_Ljiljana_-_Indexing_and_Searching_Strategies_for_the_Russian_20091209.pdf) +Serbian:[**`serbian`**](https://snowballstem.org/algorithms/serbian/stemmer.md) +Spanish:[**`light_spanish`**](https://www.ercim.eu/publication/ws-proceedings/CLEF2/savoy.pdf), [`spanish`](https://snowballstem.org/algorithms/spanish/stemmer.md) [`spanish_plural`](https://www.wikilengua.org/index.php/Plural_(formaci%C3%B3n)) +Swedish:[**`swedish`**](https://snowballstem.org/algorithms/swedish/stemmer.md), [`light_swedish`](http://clef.isti.cnr.it/2003/WN_web/22.pdf) +Turkish:[**`turkish`**](https://snowballstem.org/algorithms/turkish/stemmer.md) +::: + +`name`: An alias for the [`language`](#analysis-stemmer-tokenfilter-language-parm) parameter. If both this and the `language` parameter are specified, the `language` parameter argument is used. + + +## Customize [analysis-stemmer-tokenfilter-customize] + +To customize the `stemmer` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom `stemmer` filter that stems words using the `light_german` algorithm: + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "my_stemmer" + ] + } + }, + "filter": { + "my_stemmer": { + "type": "stemmer", + "language": "light_german" + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-stop-analyzer.md b/docs/reference/data-analysis/text-analysis/analysis-stop-analyzer.md new file mode 100644 index 0000000000000..bf250da6e831c --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-stop-analyzer.md @@ -0,0 +1,117 @@ +--- +navigation_title: "Stop" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-stop-analyzer.html +--- + +# Stop analyzer [analysis-stop-analyzer] + + +The `stop` analyzer is the same as the [`simple` analyzer](/reference/data-analysis/text-analysis/analysis-simple-analyzer.md) but adds support for removing stop words. It defaults to using the `_english_` stop words. + + +## Example output [_example_output_5] + +```console +POST _analyze +{ + "analyzer": "stop", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above sentence would produce the following terms: + +```text +[ quick, brown, foxes, jumped, over, lazy, dog, s, bone ] +``` + + +## Configuration [_configuration_6] + +The `stop` analyzer accepts the following parameters: + +`stopwords` +: A pre-defined stop words list like `_english_` or an array containing a list of stop words. Defaults to `_english_`. + +`stopwords_path` +: The path to a file containing stop words. This path is relative to the Elasticsearch `config` directory. + +See the [Stop Token Filter](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) for more information about stop word configuration. + + +## Example configuration [_example_configuration_5] + +In this example, we configure the `stop` analyzer to use a specified list of words as stop words: + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_stop_analyzer": { + "type": "stop", + "stopwords": ["the", "over"] + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_stop_analyzer", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above example produces the following terms: + +```text +[ quick, brown, foxes, jumped, lazy, dog, s, bone ] +``` + + +## Definition [_definition_5] + +It consists of: + +Tokenizer +: * [Lower Case Tokenizer](/reference/data-analysis/text-analysis/analysis-lowercase-tokenizer.md) + + +Token filters +: * [Stop Token Filter](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) + + +If you need to customize the `stop` analyzer beyond the configuration parameters then you need to recreate it as a `custom` analyzer and modify it, usually by adding token filters. This would recreate the built-in `stop` analyzer and you can use it as a starting point for further customization: + +```console +PUT /stop_example +{ + "settings": { + "analysis": { + "filter": { + "english_stop": { + "type": "stop", + "stopwords": "_english_" <1> + } + }, + "analyzer": { + "rebuilt_stop": { + "tokenizer": "lowercase", + "filter": [ + "english_stop" <2> + ] + } + } + } + } +} +``` + +1. The default stopwords can be overridden with the `stopwords` or `stopwords_path` parameters. +2. You’d add any token filters after `english_stop`. + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md new file mode 100644 index 0000000000000..babae63592cfa --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md @@ -0,0 +1,325 @@ +--- +navigation_title: "Stop" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-stop-tokenfilter.html +--- + +# Stop token filter [analysis-stop-tokenfilter] + + +Removes [stop words](https://en.wikipedia.org/wiki/Stop_words) from a token stream. + +When not customized, the filter removes the following English stop words by default: + +`a`, `an`, `and`, `are`, `as`, `at`, `be`, `but`, `by`, `for`, `if`, `in`, `into`, `is`, `it`, `no`, `not`, `of`, `on`, `or`, `such`, `that`, `the`, `their`, `then`, `there`, `these`, `they`, `this`, `to`, `was`, `will`, `with` + +In addition to English, the `stop` filter supports predefined [stop word lists for several languages](#analysis-stop-tokenfilter-stop-words-by-lang). You can also specify your own stop words as an array or file. + +The `stop` filter uses Lucene’s [StopFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/core/StopFilter.md). + +## Example [analysis-stop-tokenfilter-analyze-ex] + +The following analyze API request uses the `stop` filter to remove the stop words `a` and `the` from `a quick fox jumps over the lazy dog`: + +```console +GET /_analyze +{ + "tokenizer": "standard", + "filter": [ "stop" ], + "text": "a quick fox jumps over the lazy dog" +} +``` + +The filter produces the following tokens: + +```text +[ quick, fox, jumps, over, lazy, dog ] +``` + + +## Add to an analyzer [analysis-stop-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `stop` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "whitespace", + "filter": [ "stop" ] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-stop-tokenfilter-configure-parms] + +`stopwords` +: (Optional, string or array of strings) Language value, such as `_arabic_` or `_thai_`. Defaults to [`_english_`](#english-stop-words). + +Each language value corresponds to a predefined list of stop words in Lucene. See [Stop words by language](#analysis-stop-tokenfilter-stop-words-by-lang) for supported language values and their stop words. + +Also accepts an array of stop words. + +For an empty list of stop words, use `_none_`. + + +`stopwords_path` +: (Optional, string) Path to a file that contains a list of stop words to remove. + +This path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each stop word in the file must be separated by a line break. + + +`ignore_case` +: (Optional, Boolean) If `true`, stop word matching is case insensitive. For example, if `true`, a stop word of `the` matches and removes `The`, `THE`, or `the`. Defaults to `false`. + +`remove_trailing` +: (Optional, Boolean) If `true`, the last token of a stream is removed if it’s a stop word. Defaults to `true`. + +This parameter should be `false` when using the filter with a completion suggester. This would ensure a query like `green a` matches and suggests `green apple` while still removing other stop words. For more information about completion suggesters, refer to [](/reference/elasticsearch/rest-apis/search-suggesters.md) + +## Customize [analysis-stop-tokenfilter-customize] + +To customize the `stop` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom case-insensitive `stop` filter that removes stop words from the [`_english_`](#english-stop-words) stop words list: + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "default": { + "tokenizer": "whitespace", + "filter": [ "my_custom_stop_words_filter" ] + } + }, + "filter": { + "my_custom_stop_words_filter": { + "type": "stop", + "ignore_case": true + } + } + } + } +} +``` + +You can also specify your own list of stop words. For example, the following request creates a custom case-insensitive `stop` filter that removes only the stop words `and`, `is`, and `the`: + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "default": { + "tokenizer": "whitespace", + "filter": [ "my_custom_stop_words_filter" ] + } + }, + "filter": { + "my_custom_stop_words_filter": { + "type": "stop", + "ignore_case": true, + "stopwords": [ "and", "is", "the" ] + } + } + } + } +} +``` + + +## Stop words by language [analysis-stop-tokenfilter-stop-words-by-lang] + +The following list contains supported language values for the `stopwords` parameter and a link to their predefined stop words in Lucene. + +$$$arabic-stop-words$$$ + +`_arabic_` +: [Arabic stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt) + +$$$armenian-stop-words$$$ + +`_armenian_` +: [Armenian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/hy/stopwords.txt) + +$$$basque-stop-words$$$ + +`_basque_` +: [Basque stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/eu/stopwords.txt) + +$$$bengali-stop-words$$$ + +`_bengali_` +: [Bengali stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt) + +$$$brazilian-stop-words$$$ + +`_brazilian_` (Brazilian Portuguese) +: [Brazilian Portuguese stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/br/stopwords.txt) + +$$$bulgarian-stop-words$$$ + +`_bulgarian_` +: [Bulgarian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt) + +$$$catalan-stop-words$$$ + +`_catalan_` +: [Catalan stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ca/stopwords.txt) + +$$$cjk-stop-words$$$ + +`_cjk_` (Chinese, Japanese, and Korean) +: [CJK stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/cjk/stopwords.txt) + +$$$czech-stop-words$$$ + +`_czech_` +: [Czech stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/cz/stopwords.txt) + +$$$danish-stop-words$$$ + +`_danish_` +: [Danish stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/danish_stop.txt) + +$$$dutch-stop-words$$$ + +`_dutch_` +: [Dutch stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/dutch_stop.txt) + +$$$english-stop-words$$$ + +`_english_` +: [English stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java#L48) + +$$$estonian-stop-words$$$ + +`_estonian_` +: [Estonian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/et/stopwords.txt) + +$$$finnish-stop-words$$$ + +`_finnish_` +: [Finnish stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/finnish_stop.txt) + +$$$french-stop-words$$$ + +`_french_` +: [French stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/french_stop.txt) + +$$$galician-stop-words$$$ + +`_galician_` +: [Galician stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/gl/stopwords.txt) + +$$$german-stop-words$$$ + +`_german_` +: [German stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/german_stop.txt) + +$$$greek-stop-words$$$ + +`_greek_` +: [Greek stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/el/stopwords.txt) + +$$$hindi-stop-words$$$ + +`_hindi_` +: [Hindi stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt) + +$$$hungarian-stop-words$$$ + +`_hungarian_` +: [Hungarian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/hungarian_stop.txt) + +$$$indonesian-stop-words$$$ + +`_indonesian_` +: [Indonesian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/id/stopwords.txt) + +$$$irish-stop-words$$$ + +`_irish_` +: [Irish stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ga/stopwords.txt) + +$$$italian-stop-words$$$ + +`_italian_` +: [Italian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/italian_stop.txt) + +$$$latvian-stop-words$$$ + +`_latvian_` +: [Latvian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/lv/stopwords.txt) + +$$$lithuanian-stop-words$$$ + +`_lithuanian_` +: [Lithuanian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/lt/stopwords.txt) + +$$$norwegian-stop-words$$$ + +`_norwegian_` +: [Norwegian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/norwegian_stop.txt) + +$$$persian-stop-words$$$ + +`_persian_` +: [Persian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt) + +$$$portuguese-stop-words$$$ + +`_portuguese_` +: [Portuguese stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/portuguese_stop.txt) + +$$$romanian-stop-words$$$ + +`_romanian_` +: [Romanian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt) + +$$$russian-stop-words$$$ + +`_russian_` +: [Russian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/russian_stop.txt) + +$$$serbian-stop-words$$$ + +`_serbian_` +: [Serbian stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/sr/stopwords.txt) + +$$$sorani-stop-words$$$ + +`_sorani_` +: [Sorani stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ckb/stopwords.txt) + +$$$spanish-stop-words$$$ + +`_spanish_` +: [Spanish stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/spanish_stop.txt) + +$$$swedish-stop-words$$$ + +`_swedish_` +: [Swedish stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/snowball/swedish_stop.txt) + +$$$thai-stop-words$$$ + +`_thai_` +: [Thai stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/th/stopwords.txt) + +$$$turkish-stop-words$$$ + +`_turkish_` +: [Turkish stop words](https://github.com/apache/lucene/blob/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/tr/stopwords.txt) + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-synonym-graph-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-synonym-graph-tokenfilter.md new file mode 100644 index 0000000000000..238b447cd450b --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-synonym-graph-tokenfilter.md @@ -0,0 +1,235 @@ +--- +navigation_title: "Synonym graph" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-synonym-graph-tokenfilter.html +--- + +# Synonym graph token filter [analysis-synonym-graph-tokenfilter] + + +The `synonym_graph` token filter allows to easily handle [synonyms](docs-content://solutions/search/full-text/search-with-synonyms.md), including multi-word synonyms correctly during the analysis process. + +In order to properly handle multi-word synonyms this token filter creates a [graph token stream](docs-content://manage-data/data-store/text-analysis/token-graphs.md) during processing. For more information on this topic and its various complexities, please read the [Lucene’s TokenStreams are actually graphs](http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.md) blog post. + +::::{note} +:name: synonym-graph-index-note + +This token filter is designed to be used as part of a search analyzer only. If you want to apply synonyms during indexing please use the standard [synonym token filter](/reference/data-analysis/text-analysis/analysis-synonym-tokenfilter.md). + +:::: + + + +## Define synonyms sets [analysis-synonym-graph-define-synonyms] + +Synonyms in a synonyms set are defined using **synonym rules**. Each synonym rule contains words that are synonyms. + +You can use two formats to define synonym rules: Solr and WordNet. + + +### Solr format [_solr_format_2] + +This format uses two different definitions: + +* Equivalent synonyms: Define groups of words that are equivalent. Words are separated by commas. Example: + + ```text + ipod, i-pod, i pod + computer, pc, laptop + ``` + +* Explicit synonyms: Matches a group of words to other words. Words on the left hand side of the rule definition are expanded into all the possibilities described on the right hand side. Example: + + ```text + personal computer => pc + sea biscuit, sea biscit => seabiscuit + ``` + + + +### WordNet format [_wordnet_format_2] + +[WordNet](https://wordnet.princeton.edu/) defines synonyms sets spanning multiple lines. Each line contains the following information: + +* Synonyms set numeric identifier +* Ordinal of the synonym in the synonyms set +* Synonym word +* Word type identifier: Noun (n), verb (v), adjective (a) or adverb (b). +* Depth of the word in the synonym net + +The following example defines a synonym set for the words "come", "advance" and "approach": + +```text +s(100000002,1,'come',v,1,0). +s(100000002,2,'advance',v,1,0). +s(100000002,3,'approach',v,1,0)."""; +``` + + +## Configure synonyms sets [analysis-synonym-graph-configure-sets] + +Synonyms can be configured using the [synonyms API](docs-content://solutions/search/full-text/search-with-synonyms.md#synonyms-store-synonyms-api), a [synonyms file](docs-content://solutions/search/full-text/search-with-synonyms.md#synonyms-store-synonyms-file), or directly [inlined](docs-content://solutions/search/full-text/search-with-synonyms.md#synonyms-store-synonyms-inline) in the token filter configuration. See [store your synonyms set](docs-content://solutions/search/full-text/search-with-synonyms.md#synonyms-store-synonyms) for more details on each option. + +Use `synonyms_set` configuration option to provide a synonym set created via Synonyms Management APIs: + +```JSON + "filter": { + "synonyms_filter": { + "type": "synonym_graph", + "synonyms_set": "my-synonym-set", + "updateable": true + } + } +``` + +::::{warning} +Synonyms sets must exist before they can be added to indices. If an index is created referencing a nonexistent synonyms set, the index will remain in a partially created and inoperable state. The only way to recover from this scenario is to ensure the synonyms set exists then either delete and re-create the index, or close and re-open the index. + +:::: + + +Use `synonyms_path` to provide a synonym file : + +```JSON + "filter": { + "synonyms_filter": { + "type": "synonym_graph", + "synonyms_path": "analysis/synonym-set.txt" + } + } +``` + +The above configures a `synonym` filter, with a path of `analysis/synonym-set.txt` (relative to the `config` location). + +Use `synonyms` to define inline synonyms: + +```JSON + "filter": { + "synonyms_filter": { + "type": "synonym_graph", + "synonyms": ["pc => personal computer", "computer, pc, laptop"] + } + } +``` + +Additional settings are: + +* `updateable` (defaults to `false`). If `true` allows [reloading](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-reload-search-analyzers) search analyzers to pick up changes to synonym files. Only to be used for search analyzers. +* `expand` (defaults to `true`). Expands definitions for equivalent synonym rules. See [expand equivalent synonyms](#synonym-graph-tokenizer-expand-equivalent-synonyms). +* `lenient` (defaults to the value of the `updateable` setting). If `true` ignores errors while parsing the synonym rules. It is important to note that only those synonym rules which cannot get parsed are ignored. See [synonyms and stop token filters](#synonym-graph-tokenizer-stop-token-filter) for an example of `lenient` behaviour for invalid synonym rules. + + +### `expand` equivalent synonym rules [synonym-graph-tokenizer-expand-equivalent-synonyms] + +The `expand` parameter controls whether to expand equivalent synonym rules. Consider a synonym defined like: + +`foo, bar, baz` + +Using `expand: true`, the synonym rule would be expanded into: + +``` +foo => foo +foo => bar +foo => baz +bar => foo +bar => bar +bar => baz +baz => foo +baz => bar +baz => baz +``` + +When `expand` is set to `false`, the synonym rule is not expanded and the first synonym is treated as the canonical representation. The synonym would be equivalent to: + +``` +foo => foo +bar => foo +baz => foo +``` + +The `expand` parameter does not affect explicit synonym rules, like `foo, bar => baz`. + + +### `tokenizer` and `ignore_case` are deprecated [synonym-graph-tokenizer-ignore_case-deprecated] + +The `tokenizer` parameter controls the tokenizers that will be used to tokenize the synonym, this parameter is for backwards compatibility for indices that created before 6.0. The `ignore_case` parameter works with `tokenizer` parameter only. + + +## Configure analyzers with synonym graph token filters [analysis-synonym-graph-analizers-configure] + +To apply synonyms, you will need to include a synonym graph token filter into an analyzer: + +```JSON + "analyzer": { + "my_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["stemmer", "synonym_graph"] + } + } +``` + + +### Token filters ordering [analysis-synonym-graph-token-order] + +Order is important for your token filters. Text will be processed first through filters preceding the synonym filter before being processed by the synonym filter. + +{{es}} will also use the token filters preceding the synonym filter in a tokenizer chain to parse the entries in a synonym file or synonym set. In the above example, the synonyms graph token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. + +Because entries in the synonym map cannot have stacked positions, some token filters may cause issues here. Token filters that produce multiple versions of a token may choose which version of the token to emit when parsing synonyms. For example, `asciifolding` will only produce the folded version of the token. Others, like `multiplexer`, `word_delimiter_graph` or `ngram` will throw an error. + +If you need to build analyzers that include both multi-token filters and synonym filters, consider using the [multiplexer](/reference/data-analysis/text-analysis/analysis-multiplexer-tokenfilter.md) filter, with the multi-token filters in one branch and the synonym filter in the other. + + +### Synonyms and `stop` token filters [synonym-graph-tokenizer-stop-token-filter] + +Synonyms and [stop token filters](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) interact with each other in the following ways: + + +#### Stop token filter **before** synonym token filter [_stop_token_filter_before_synonym_token_filter_2] + +Stop words will be removed from the synonym rule definition. This can can cause errors on the synonym rule. + +::::{warning} +If `lenient` is set to `false`, invalid synonym rules can cause errors when applying analyzer changes. For reloadable analyzers, this prevents reloading and applying changes. You must correct errors in the synonym rules and reload the analyzer. + +When `lenient` is set to `false`, an index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) + +:::: + + +For **explicit synonym rules** like `foo, bar => baz` with a stop filter that removes `bar`: + +* If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the left hand side of the synonym rule. +* If `lenient` is set to `true`, the rule `foo => baz` will be added and `bar => baz` will be ignored. + +If the stop filter removed `baz` instead: + +* If `lenient` is set to `false`, an error will be raised as `baz` would be removed from the right hand side of the synonym rule. +* If `lenient` is set to `true`, the synonym will have no effect as the target word is removed. + +For **equivalent synonym rules** like `foo, bar, baz` and `expand: true, with a stop filter that removes `bar`: + +* If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the synonym rule. +* If `lenient` is set to `true`, the synonyms added would be equivalent to the following synonym rules, which do not contain the removed word: + +``` +foo => foo +foo => baz +baz => foo +baz => baz +``` + + +#### Stop token filter **after** synonym token filter [_stop_token_filter_after_synonym_token_filter_2] + +The stop filter will remove the terms from the resulting synonym expansion. + +For example, a synonym rule like `foo, bar => baz` and a stop filter that removes `baz` will get no matches for `foo` or `bar`, as both would get expanded to `baz` which is removed by the stop filter. + +If the stop filter removed `foo` instead, then searching for `foo` would get expanded to `baz`, which is not removed by the stop filter thus potentially providing matches for `baz`. + diff --git a/docs/reference/data-analysis/text-analysis/analysis-synonym-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-synonym-tokenfilter.md new file mode 100644 index 0000000000000..bb293ac0c97cf --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-synonym-tokenfilter.md @@ -0,0 +1,225 @@ +--- +navigation_title: "Synonym" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-synonym-tokenfilter.html +--- + +# Synonym token filter [analysis-synonym-tokenfilter] + + +The `synonym` token filter allows to easily handle [synonyms](docs-content://solutions/search/full-text/search-with-synonyms.md) during the analysis process. + + +## Define synonyms sets [analysis-synonym-define-synonyms] + +Synonyms in a synonyms set are defined using **synonym rules**. Each synonym rule contains words that are synonyms. + +You can use two formats to define synonym rules: Solr and WordNet. + + +### Solr format [_solr_format] + +This format uses two different definitions: + +* Equivalent synonyms: Define groups of words that are equivalent. Words are separated by commas. Example: + + ```text + ipod, i-pod, i pod + computer, pc, laptop + ``` + +* Explicit synonyms: Matches a group of words to other words. Words on the left hand side of the rule definition are expanded into all the possibilities described on the right hand side. Example: + + ```text + personal computer => pc + sea biscuit, sea biscit => seabiscuit + ``` + + + +### WordNet format [_wordnet_format] + +[WordNet](https://wordnet.princeton.edu/) defines synonyms sets spanning multiple lines. Each line contains the following information: + +* Synonyms set numeric identifier +* Ordinal of the synonym in the synonyms set +* Synonym word +* Word type identifier: Noun (n), verb (v), adjective (a) or adverb (b). +* Depth of the word in the synonym net + +The following example defines a synonym set for the words "come", "advance" and "approach": + +```text +s(100000002,1,'come',v,1,0). +s(100000002,2,'advance',v,1,0). +s(100000002,3,'approach',v,1,0)."""; +``` + + +## Configure synonyms sets [analysis-synonym-configure-sets] + +Synonyms can be configured using the [synonyms API](docs-content://solutions/search/full-text/search-with-synonyms.md#synonyms-store-synonyms-api), a [synonyms file](docs-content://solutions/search/full-text/search-with-synonyms.md#synonyms-store-synonyms-file), or directly [inlined](docs-content://solutions/search/full-text/search-with-synonyms.md#synonyms-store-synonyms-inline) in the token filter configuration. See [store your synonyms set](docs-content://solutions/search/full-text/search-with-synonyms.md#synonyms-store-synonyms) for more details on each option. + +Use `synonyms_set` configuration option to provide a synonym set created via Synonyms Management APIs: + +```JSON + "filter": { + "synonyms_filter": { + "type": "synonym", + "synonyms_set": "my-synonym-set", + "updateable": true + } + } +``` + +::::{warning} +Synonyms sets must exist before they can be added to indices. If an index is created referencing a nonexistent synonyms set, the index will remain in a partially created and inoperable state. The only way to recover from this scenario is to ensure the synonyms set exists then either delete and re-create the index, or close and re-open the index. + +:::: + + +Use `synonyms_path` to provide a synonym file : + +```JSON + "filter": { + "synonyms_filter": { + "type": "synonym", + "synonyms_path": "analysis/synonym-set.txt" + } + } +``` + +The above configures a `synonym` filter, with a path of `analysis/synonym-set.txt` (relative to the `config` location). + +Use `synonyms` to define inline synonyms: + +```JSON + "filter": { + "synonyms_filter": { + "type": "synonym", + "synonyms": ["pc => personal computer", "computer, pc, laptop"] + } + } +``` + +Additional settings are: + +* `updateable` (defaults to `false`). If `true` allows [reloading](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-reload-search-analyzers) search analyzers to pick up changes to synonym files. Only to be used for search analyzers. +* `expand` (defaults to `true`). Expands definitions for equivalent synonym rules. See [expand equivalent synonyms](#synonym-tokenizer-expand-equivalent-synonyms). +* `lenient` (defaults to the value of the `updateable` setting). If `true` ignores errors while parsing the synonym rules. It is important to note that only those synonym rules which cannot get parsed are ignored. See [synonyms and stop token filters](#synonym-tokenizer-stop-token-filter) for an example of `lenient` behaviour for invalid synonym rules. + + +### `expand` equivalent synonym rules [synonym-tokenizer-expand-equivalent-synonyms] + +The `expand` parameter controls whether to expand equivalent synonym rules. Consider a synonym defined like: + +`foo, bar, baz` + +Using `expand: true`, the synonym rule would be expanded into: + +``` +foo => foo +foo => bar +foo => baz +bar => foo +bar => bar +bar => baz +baz => foo +baz => bar +baz => baz +``` + +When `expand` is set to `false`, the synonym rule is not expanded and the first synonym is treated as the canonical representation. The synonym would be equivalent to: + +``` +foo => foo +bar => foo +baz => foo +``` + +The `expand` parameter does not affect explicit synonym rules, like `foo, bar => baz`. + + +### `tokenizer` and `ignore_case` are deprecated [synonym-tokenizer-ignore_case-deprecated] + +The `tokenizer` parameter controls the tokenizers that will be used to tokenize the synonym, this parameter is for backwards compatibility for indices that created before 6.0. The `ignore_case` parameter works with `tokenizer` parameter only. + + +## Configure analyzers with synonym token filters [analysis-synonym-analizers-configure] + +To apply synonyms, you will need to include a synonym token filters into an analyzer: + +```JSON + "analyzer": { + "my_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["stemmer", "synonym"] + } + } +``` + + +### Token filters ordering [analysis-synonym-token-order] + +Order is important for your token filters. Text will be processed first through filters preceding the synonym filter before being processed by the synonym filter. + +{{es}} will also use the token filters preceding the synonym filter in a tokenizer chain to parse the entries in a synonym file or synonym set. In the above example, the synonyms token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. + +Because entries in the synonym map cannot have stacked positions, some token filters may cause issues here. Token filters that produce multiple versions of a token may choose which version of the token to emit when parsing synonyms. For example, `asciifolding` will only produce the folded version of the token. Others, like `multiplexer`, `word_delimiter_graph` or `ngram` will throw an error. + +If you need to build analyzers that include both multi-token filters and synonym filters, consider using the [multiplexer](/reference/data-analysis/text-analysis/analysis-multiplexer-tokenfilter.md) filter, with the multi-token filters in one branch and the synonym filter in the other. + + +### Synonyms and `stop` token filters [synonym-tokenizer-stop-token-filter] + +Synonyms and [stop token filters](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) interact with each other in the following ways: + + +#### Stop token filter **before** synonym token filter [_stop_token_filter_before_synonym_token_filter] + +Stop words will be removed from the synonym rule definition. This can can cause errors on the synonym rule. + +::::{warning} +If `lenient` is set to `false`, invalid synonym rules can cause errors when applying analyzer changes. For reloadable analyzers, this prevents reloading and applying changes. You must correct errors in the synonym rules and reload the analyzer. + +When `lenient` is set to `false`, an index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) + +:::: + + +For **explicit synonym rules** like `foo, bar => baz` with a stop filter that removes `bar`: + +* If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the left hand side of the synonym rule. +* If `lenient` is set to `true`, the rule `foo => baz` will be added and `bar => baz` will be ignored. + +If the stop filter removed `baz` instead: + +* If `lenient` is set to `false`, an error will be raised as `baz` would be removed from the right hand side of the synonym rule. +* If `lenient` is set to `true`, the synonym will have no effect as the target word is removed. + +For **equivalent synonym rules** like `foo, bar, baz` and `expand: true, with a stop filter that removes `bar`: + +* If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the synonym rule. +* If `lenient` is set to `true`, the synonyms added would be equivalent to the following synonym rules, which do not contain the removed word: + +``` +foo => foo +foo => baz +baz => foo +baz => baz +``` + + +#### Stop token filter **after** synonym token filter [_stop_token_filter_after_synonym_token_filter] + +The stop filter will remove the terms from the resulting synonym expansion. + +For example, a synonym rule like `foo, bar => baz` and a stop filter that removes `baz` will get no matches for `foo` or `bar`, as both would get expanded to `baz` which is removed by the stop filter. + +If the stop filter removed `foo` instead, then searching for `foo` would get expanded to `baz`, which is not removed by the stop filter thus potentially providing matches for `baz`. + diff --git a/docs/reference/data-analysis/text-analysis/analysis-thai-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-thai-tokenizer.md new file mode 100644 index 0000000000000..288427d6dae08 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-thai-tokenizer.md @@ -0,0 +1,38 @@ +--- +navigation_title: "Thai" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-thai-tokenizer.html +--- + +# Thai tokenizer [analysis-thai-tokenizer] + + +The `thai` tokenizer segments Thai text into words, using the Thai segmentation algorithm included with Java. Text in other languages in general will be treated the same as the [`standard` tokenizer](/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md). + +::::{warning} +This tokenizer may not be supported by all JREs. It is known to work with Sun/Oracle and OpenJDK. If your application needs to be fully portable, consider using the [ICU Tokenizer](/reference/elasticsearch-plugins/analysis-icu-tokenizer.md) instead. +:::: + + + +## Example output [_example_output_17] + +```console +POST _analyze +{ + "tokenizer": "thai", + "text": "การที่ได้ต้องแสดงว่างานดี" +} +``` + +The above sentence would produce the following terms: + +```text +[ การ, ที่, ได้, ต้อง, แสดง, ว่า, งาน, ดี ] +``` + + +## Configuration [_configuration_20] + +The `thai` tokenizer is not configurable. + diff --git a/docs/reference/data-analysis/text-analysis/analysis-trim-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-trim-tokenfilter.md new file mode 100644 index 0000000000000..fb29e2c8741fc --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-trim-tokenfilter.md @@ -0,0 +1,98 @@ +--- +navigation_title: "Trim" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-trim-tokenfilter.html +--- + +# Trim token filter [analysis-trim-tokenfilter] + + +Removes leading and trailing whitespace from each token in a stream. While this can change the length of a token, the `trim` filter does *not* change a token’s offsets. + +The `trim` filter uses Lucene’s [TrimFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/TrimFilter.md). + +::::{tip} +Many commonly used tokenizers, such as the [`standard`](/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md) or [`whitespace`](/reference/data-analysis/text-analysis/analysis-whitespace-tokenizer.md) tokenizer, remove whitespace by default. When using these tokenizers, you don’t need to add a separate `trim` filter. + +:::: + + +## Example [analysis-trim-tokenfilter-analyze-ex] + +To see how the `trim` filter works, you first need to produce a token containing whitespace. + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the [`keyword`](/reference/data-analysis/text-analysis/analysis-keyword-tokenizer.md) tokenizer to produce a token for `" fox "`. + +```console +GET _analyze +{ + "tokenizer" : "keyword", + "text" : " fox " +} +``` + +The API returns the following response. Note the `" fox "` token contains the original text’s whitespace. Note that despite changing the token’s length, the `start_offset` and `end_offset` remain the same. + +```console-result +{ + "tokens": [ + { + "token": " fox ", + "start_offset": 0, + "end_offset": 5, + "type": "word", + "position": 0 + } + ] +} +``` + +To remove the whitespace, add the `trim` filter to the previous analyze API request. + +```console +GET _analyze +{ + "tokenizer" : "keyword", + "filter" : ["trim"], + "text" : " fox " +} +``` + +The API returns the following response. The returned `fox` token does not include any leading or trailing whitespace. + +```console-result +{ + "tokens": [ + { + "token": "fox", + "start_offset": 0, + "end_offset": 5, + "type": "word", + "position": 0 + } + ] +} +``` + + +## Add to an analyzer [analysis-trim-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `trim` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT trim_example +{ + "settings": { + "analysis": { + "analyzer": { + "keyword_trim": { + "tokenizer": "keyword", + "filter": [ "trim" ] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-truncate-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-truncate-tokenfilter.md new file mode 100644 index 0000000000000..fb923c1acffe6 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-truncate-tokenfilter.md @@ -0,0 +1,91 @@ +--- +navigation_title: "Truncate" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-truncate-tokenfilter.html +--- + +# Truncate token filter [analysis-truncate-tokenfilter] + + +Truncates tokens that exceed a specified character limit. This limit defaults to `10` but can be customized using the `length` parameter. + +For example, you can use the `truncate` filter to shorten all tokens to `3` characters or fewer, changing `jumping fox` to `jum fox`. + +This filter uses Lucene’s [TruncateTokenFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.md). + +## Example [analysis-truncate-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `truncate` filter to shorten tokens that exceed 10 characters in `the quinquennial extravaganza carried on`: + +```console +GET _analyze +{ + "tokenizer" : "whitespace", + "filter" : ["truncate"], + "text" : "the quinquennial extravaganza carried on" +} +``` + +The filter produces the following tokens: + +```text +[ the, quinquenni, extravagan, carried, on ] +``` + + +## Add to an analyzer [analysis-truncate-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `truncate` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT custom_truncate_example +{ + "settings" : { + "analysis" : { + "analyzer" : { + "standard_truncate" : { + "tokenizer" : "standard", + "filter" : ["truncate"] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-truncate-tokenfilter-configure-parms] + +`length` +: (Optional, integer) Character limit for each token. Tokens exceeding this limit are truncated. Defaults to `10`. + + +## Customize [analysis-truncate-tokenfilter-customize] + +To customize the `truncate` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom `truncate` filter, `5_char_trunc`, that shortens tokens to a `length` of `5` or fewer characters: + +```console +PUT 5_char_words_example +{ + "settings": { + "analysis": { + "analyzer": { + "lowercase_5_char": { + "tokenizer": "lowercase", + "filter": [ "5_char_trunc" ] + } + }, + "filter": { + "5_char_trunc": { + "type": "truncate", + "length": 5 + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-uaxurlemail-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-uaxurlemail-tokenizer.md new file mode 100644 index 0000000000000..579738a4f39eb --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-uaxurlemail-tokenizer.md @@ -0,0 +1,80 @@ +--- +navigation_title: "UAX URL email" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-uaxurlemail-tokenizer.html +--- + +# UAX URL email tokenizer [analysis-uaxurlemail-tokenizer] + + +The `uax_url_email` tokenizer is like the [`standard` tokenizer](/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md) except that it recognises URLs and email addresses as single tokens. + + +## Example output [_example_output_18] + +```console +POST _analyze +{ + "tokenizer": "uax_url_email", + "text": "Email me at john.smith@global-international.com" +} +``` + +The above sentence would produce the following terms: + +```text +[ Email, me, at, john.smith@global-international.com ] +``` + +while the `standard` tokenizer would produce: + +```text +[ Email, me, at, john.smith, global, international.com ] +``` + + +## Configuration [_configuration_21] + +The `uax_url_email` tokenizer accepts the following parameters: + +`max_token_length` +: The maximum token length. If a token is seen that exceeds this length then it is split at `max_token_length` intervals. Defaults to `255`. + + +## Example configuration [_example_configuration_14] + +In this example, we configure the `uax_url_email` tokenizer to have a `max_token_length` of 5 (for demonstration purposes): + +```console +PUT my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "my_tokenizer" + } + }, + "tokenizer": { + "my_tokenizer": { + "type": "uax_url_email", + "max_token_length": 5 + } + } + } + } +} + +POST my-index-000001/_analyze +{ + "analyzer": "my_analyzer", + "text": "john.smith@global-international.com" +} +``` + +The above example produces the following terms: + +```text +[ john, smith, globa, l, inter, natio, nal.c, om ] +``` + diff --git a/docs/reference/data-analysis/text-analysis/analysis-unique-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-unique-tokenfilter.md new file mode 100644 index 0000000000000..6d69f6c9fa992 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-unique-tokenfilter.md @@ -0,0 +1,95 @@ +--- +navigation_title: "Unique" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-unique-tokenfilter.html +--- + +# Unique token filter [analysis-unique-tokenfilter] + + +Removes duplicate tokens from a stream. For example, you can use the `unique` filter to change `the lazy lazy dog` to `the lazy dog`. + +If the `only_on_same_position` parameter is set to `true`, the `unique` filter removes only duplicate tokens *in the same position*. + +::::{note} +When `only_on_same_position` is `true`, the `unique` filter works the same as [`remove_duplicates`](/reference/data-analysis/text-analysis/analysis-remove-duplicates-tokenfilter.md) filter. + +:::: + + +## Example [analysis-unique-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `unique` filter to remove duplicate tokens from `the quick fox jumps the lazy fox`: + +```console +GET _analyze +{ + "tokenizer" : "whitespace", + "filter" : ["unique"], + "text" : "the quick fox jumps the lazy fox" +} +``` + +The filter removes duplicated tokens for `the` and `fox`, producing the following output: + +```text +[ the, quick, fox, jumps, lazy ] +``` + + +## Add to an analyzer [analysis-unique-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `unique` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT custom_unique_example +{ + "settings" : { + "analysis" : { + "analyzer" : { + "standard_truncate" : { + "tokenizer" : "standard", + "filter" : ["unique"] + } + } + } + } +} +``` + + +## Configurable parameters [analysis-unique-tokenfilter-configure-parms] + +`only_on_same_position` +: (Optional, Boolean) If `true`, only remove duplicate tokens in the same position. Defaults to `false`. + + +## Customize [analysis-unique-tokenfilter-customize] + +To customize the `unique` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a custom `unique` filter with `only_on_same_position` set to `true`. + +```console +PUT letter_unique_pos_example +{ + "settings": { + "analysis": { + "analyzer": { + "letter_unique_pos": { + "tokenizer": "letter", + "filter": [ "unique_pos" ] + } + }, + "filter": { + "unique_pos": { + "type": "unique", + "only_on_same_position": true + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-uppercase-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-uppercase-tokenfilter.md new file mode 100644 index 0000000000000..40022c0d6d40f --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-uppercase-tokenfilter.md @@ -0,0 +1,62 @@ +--- +navigation_title: "Uppercase" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-uppercase-tokenfilter.html +--- + +# Uppercase token filter [analysis-uppercase-tokenfilter] + + +Changes token text to uppercase. For example, you can use the `uppercase` filter to change `the Lazy DoG` to `THE LAZY DOG`. + +This filter uses Lucene’s [UpperCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/core/UpperCaseFilter.md). + +::::{warning} +Depending on the language, an uppercase character can map to multiple lowercase characters. Using the `uppercase` filter could result in the loss of lowercase character information. + +To avoid this loss but still have a consistent letter case, use the [`lowercase`](/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md) filter instead. + +:::: + + +## Example [analysis-uppercase-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the default `uppercase` filter to change the `the Quick FoX JUMPs` to uppercase: + +```console +GET _analyze +{ + "tokenizer" : "standard", + "filter" : ["uppercase"], + "text" : "the Quick FoX JUMPs" +} +``` + +The filter produces the following tokens: + +```text +[ THE, QUICK, FOX, JUMPS ] +``` + + +## Add to an analyzer [analysis-uppercase-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `uppercase` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT uppercase_example +{ + "settings": { + "analysis": { + "analyzer": { + "whitespace_uppercase": { + "tokenizer": "whitespace", + "filter": [ "uppercase" ] + } + } + } + } +} +``` + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-whitespace-analyzer.md b/docs/reference/data-analysis/text-analysis/analysis-whitespace-analyzer.md new file mode 100644 index 0000000000000..361ec7ba003d4 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-whitespace-analyzer.md @@ -0,0 +1,64 @@ +--- +navigation_title: "Whitespace" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-whitespace-analyzer.html +--- + +# Whitespace analyzer [analysis-whitespace-analyzer] + + +The `whitespace` analyzer breaks text into terms whenever it encounters a whitespace character. + + +## Example output [_example_output_6] + +```console +POST _analyze +{ + "analyzer": "whitespace", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above sentence would produce the following terms: + +```text +[ The, 2, QUICK, Brown-Foxes, jumped, over, the, lazy, dog's, bone. ] +``` + + +## Configuration [_configuration_7] + +The `whitespace` analyzer is not configurable. + + +## Definition [_definition_6] + +It consists of: + +Tokenizer +: * [Whitespace Tokenizer](/reference/data-analysis/text-analysis/analysis-whitespace-tokenizer.md) + + +If you need to customize the `whitespace` analyzer then you need to recreate it as a `custom` analyzer and modify it, usually by adding token filters. This would recreate the built-in `whitespace` analyzer and you can use it as a starting point for further customization: + +```console +PUT /whitespace_example +{ + "settings": { + "analysis": { + "analyzer": { + "rebuilt_whitespace": { + "tokenizer": "whitespace", + "filter": [ <1> + ] + } + } + } + } +} +``` + +1. You’d add any token filters here. + + diff --git a/docs/reference/data-analysis/text-analysis/analysis-whitespace-tokenizer.md b/docs/reference/data-analysis/text-analysis/analysis-whitespace-tokenizer.md new file mode 100644 index 0000000000000..bc7a9b0b74626 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-whitespace-tokenizer.md @@ -0,0 +1,36 @@ +--- +navigation_title: "Whitespace" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-whitespace-tokenizer.html +--- + +# Whitespace tokenizer [analysis-whitespace-tokenizer] + + +The `whitespace` tokenizer breaks text into terms whenever it encounters a whitespace character. + + +## Example output [_example_output_19] + +```console +POST _analyze +{ + "tokenizer": "whitespace", + "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." +} +``` + +The above sentence would produce the following terms: + +```text +[ The, 2, QUICK, Brown-Foxes, jumped, over, the, lazy, dog's, bone. ] +``` + + +## Configuration [_configuration_22] + +The `whitespace` tokenizer accepts the following parameters: + +`max_token_length` +: The maximum token length. If a token is seen that exceeds this length then it is split at `max_token_length` intervals. Defaults to `255`. + diff --git a/docs/reference/data-analysis/text-analysis/analysis-word-delimiter-graph-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-word-delimiter-graph-tokenfilter.md new file mode 100644 index 0000000000000..d6065203f2374 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-word-delimiter-graph-tokenfilter.md @@ -0,0 +1,321 @@ +--- +navigation_title: "Word delimiter graph" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-word-delimiter-graph-tokenfilter.html +--- + +# Word delimiter graph token filter [analysis-word-delimiter-graph-tokenfilter] + + +Splits tokens at non-alphanumeric characters. The `word_delimiter_graph` filter also performs optional token normalization based on a set of rules. By default, the filter uses the following rules: + +* Split tokens at non-alphanumeric characters. The filter uses these characters as delimiters. For example: `Super-Duper` → `Super`, `Duper` +* Remove leading or trailing delimiters from each token. For example: `XL---42+'Autocoder'` → `XL`, `42`, `Autocoder` +* Split tokens at letter case transitions. For example: `PowerShot` → `Power`, `Shot` +* Split tokens at letter-number transitions. For example: `XL500` → `XL`, `500` +* Remove the English possessive (`'s`) from the end of each token. For example: `Neil's` → `Neil` + +The `word_delimiter_graph` filter uses Lucene’s [WordDelimiterGraphFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.md). + +::::{tip} +The `word_delimiter_graph` filter was designed to remove punctuation from complex identifiers, such as product IDs or part numbers. For these use cases, we recommend using the `word_delimiter_graph` filter with the [`keyword`](/reference/data-analysis/text-analysis/analysis-keyword-tokenizer.md) tokenizer. + +Avoid using the `word_delimiter_graph` filter to split hyphenated words, such as `wi-fi`. Because users often search for these words both with and without hyphens, we recommend using the [`synonym_graph`](/reference/data-analysis/text-analysis/analysis-synonym-graph-tokenfilter.md) filter instead. + +:::: + + +## Example [analysis-word-delimiter-graph-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `word_delimiter_graph` filter to split `Neil's-Super-Duper-XL500--42+AutoCoder` into normalized tokens using the filter’s default rules: + +```console +GET /_analyze +{ + "tokenizer": "keyword", + "filter": [ "word_delimiter_graph" ], + "text": "Neil's-Super-Duper-XL500--42+AutoCoder" +} +``` + +The filter produces the following tokens: + +```txt +[ Neil, Super, Duper, XL, 500, 42, Auto, Coder ] +``` + + +## Add to an analyzer [analysis-word-delimiter-graph-tokenfilter-analyzer-ex] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `word_delimiter_graph` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "keyword", + "filter": [ "word_delimiter_graph" ] + } + } + } + } +} +``` + +::::{warning} +Avoid using the `word_delimiter_graph` filter with tokenizers that remove punctuation, such as the [`standard`](/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md) tokenizer. This could prevent the `word_delimiter_graph` filter from splitting tokens correctly. It can also interfere with the filter’s configurable parameters, such as [`catenate_all`](#word-delimiter-graph-tokenfilter-catenate-all) or [`preserve_original`](#word-delimiter-graph-tokenfilter-preserve-original). We recommend using the [`keyword`](/reference/data-analysis/text-analysis/analysis-keyword-tokenizer.md) or [`whitespace`](/reference/data-analysis/text-analysis/analysis-whitespace-tokenizer.md) tokenizer instead. + +:::: + + + +## Configurable parameters [word-delimiter-graph-tokenfilter-configure-parms] + +$$$word-delimiter-graph-tokenfilter-adjust-offsets$$$ + +`adjust_offsets` +: (Optional, Boolean) If `true`, the filter adjusts the offsets of split or catenated tokens to better reflect their actual position in the token stream. Defaults to `true`. + +::::{warning} +Set `adjust_offsets` to `false` if your analyzer uses filters, such as the [`trim`](/reference/data-analysis/text-analysis/analysis-trim-tokenfilter.md) filter, that change the length of tokens without changing their offsets. Otherwise, the `word_delimiter_graph` filter could produce tokens with illegal offsets. + +:::: + + + +$$$word-delimiter-graph-tokenfilter-catenate-all$$$ + +`catenate_all` +: (Optional, Boolean) If `true`, the filter produces catenated tokens for chains of alphanumeric characters separated by non-alphabetic delimiters. For example: `super-duper-xl-500` → [ **`superduperxl500`**, `super`, `duper`, `xl`, `500` ]. Defaults to `false`. + +::::{warning} +Setting this parameter to `true` produces multi-position tokens, which are not supported by indexing. + +If this parameter is `true`, avoid using this filter in an index analyzer or use the [`flatten_graph`](/reference/data-analysis/text-analysis/analysis-flatten-graph-tokenfilter.md) filter after this filter to make the token stream suitable for indexing. + +When used for search analysis, catenated tokens can cause problems for the [`match_phrase`](/reference/query-languages/query-dsl-match-query-phrase.md) query and other queries that rely on token position for matching. Avoid setting this parameter to `true` if you plan to use these queries. + +:::: + + + +$$$word-delimiter-graph-tokenfilter-catenate-numbers$$$ + +`catenate_numbers` +: (Optional, Boolean) If `true`, the filter produces catenated tokens for chains of numeric characters separated by non-alphabetic delimiters. For example: `01-02-03` → [ **`010203`**, `01`, `02`, `03` ]. Defaults to `false`. + +::::{warning} +Setting this parameter to `true` produces multi-position tokens, which are not supported by indexing. + +If this parameter is `true`, avoid using this filter in an index analyzer or use the [`flatten_graph`](/reference/data-analysis/text-analysis/analysis-flatten-graph-tokenfilter.md) filter after this filter to make the token stream suitable for indexing. + +When used for search analysis, catenated tokens can cause problems for the [`match_phrase`](/reference/query-languages/query-dsl-match-query-phrase.md) query and other queries that rely on token position for matching. Avoid setting this parameter to `true` if you plan to use these queries. + +:::: + + + +$$$word-delimiter-graph-tokenfilter-catenate-words$$$ + +`catenate_words` +: (Optional, Boolean) If `true`, the filter produces catenated tokens for chains of alphabetical characters separated by non-alphabetic delimiters. For example: `super-duper-xl` → [ **`superduperxl`**, `super`, `duper`, `xl` ]. Defaults to `false`. + +::::{warning} +Setting this parameter to `true` produces multi-position tokens, which are not supported by indexing. + +If this parameter is `true`, avoid using this filter in an index analyzer or use the [`flatten_graph`](/reference/data-analysis/text-analysis/analysis-flatten-graph-tokenfilter.md) filter after this filter to make the token stream suitable for indexing. + +When used for search analysis, catenated tokens can cause problems for the [`match_phrase`](/reference/query-languages/query-dsl-match-query-phrase.md) query and other queries that rely on token position for matching. Avoid setting this parameter to `true` if you plan to use these queries. + +:::: + + + +`generate_number_parts` +: (Optional, Boolean) If `true`, the filter includes tokens consisting of only numeric characters in the output. If `false`, the filter excludes these tokens from the output. Defaults to `true`. + +`generate_word_parts` +: (Optional, Boolean) If `true`, the filter includes tokens consisting of only alphabetical characters in the output. If `false`, the filter excludes these tokens from the output. Defaults to `true`. + +`ignore_keywords` +: (Optional, Boolean) If `true`, the filter skips tokens with a `keyword` attribute of `true`. Defaults to `false`. + +$$$word-delimiter-graph-tokenfilter-preserve-original$$$ + +`preserve_original` +: (Optional, Boolean) If `true`, the filter includes the original version of any split tokens in the output. This original version includes non-alphanumeric delimiters. For example: `super-duper-xl-500` → [ **`super-duper-xl-500`**, `super`, `duper`, `xl`, `500` ]. Defaults to `false`. + +::::{warning} +Setting this parameter to `true` produces multi-position tokens, which are not supported by indexing. + +If this parameter is `true`, avoid using this filter in an index analyzer or use the [`flatten_graph`](/reference/data-analysis/text-analysis/analysis-flatten-graph-tokenfilter.md) filter after this filter to make the token stream suitable for indexing. + +:::: + + + +`protected_words` +: (Optional, array of strings) Array of tokens the filter won’t split. + +`protected_words_path` +: (Optional, string) Path to a file that contains a list of tokens the filter won’t split. + +This path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each token in the file must be separated by a line break. + + +`split_on_case_change` +: (Optional, Boolean) If `true`, the filter splits tokens at letter case transitions. For example: `camelCase` → [ `camel`, `Case` ]. Defaults to `true`. + +`split_on_numerics` +: (Optional, Boolean) If `true`, the filter splits tokens at letter-number transitions. For example: `j2se` → [ `j`, `2`, `se` ]. Defaults to `true`. + +`stem_english_possessive` +: (Optional, Boolean) If `true`, the filter removes the English possessive (`'s`) from the end of each token. For example: `O'Neil's` → [ `O`, `Neil` ]. Defaults to `true`. + +`type_table` +: (Optional, array of strings) Array of custom type mappings for characters. This allows you to map non-alphanumeric characters as numeric or alphanumeric to avoid splitting on those characters. + +For example, the following array maps the plus (`+`) and hyphen (`-`) characters as alphanumeric, which means they won’t be treated as delimiters: + +`[ "+ => ALPHA", "- => ALPHA" ]` + +Supported types include: + +* `ALPHA` (Alphabetical) +* `ALPHANUM` (Alphanumeric) +* `DIGIT` (Numeric) +* `LOWER` (Lowercase alphabetical) +* `SUBWORD_DELIM` (Non-alphanumeric delimiter) +* `UPPER` (Uppercase alphabetical) + + +`type_table_path` +: (Optional, string) Path to a file that contains custom type mappings for characters. This allows you to map non-alphanumeric characters as numeric or alphanumeric to avoid splitting on those characters. + +For example, the contents of this file may contain the following: + +```txt +# Map the $, %, '.', and ',' characters to DIGIT +# This might be useful for financial data. +$ => DIGIT +% => DIGIT +. => DIGIT +\\u002C => DIGIT + +# in some cases you might not want to split on ZWJ +# this also tests the case where we need a bigger byte[] +# see https://en.wikipedia.org/wiki/Zero-width_joiner +\\u200D => ALPHANUM +``` + +Supported types include: + +* `ALPHA` (Alphabetical) +* `ALPHANUM` (Alphanumeric) +* `DIGIT` (Numeric) +* `LOWER` (Lowercase alphabetical) +* `SUBWORD_DELIM` (Non-alphanumeric delimiter) +* `UPPER` (Uppercase alphabetical) + +This file path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each mapping in the file must be separated by a line break. + + + +## Customize [analysis-word-delimiter-graph-tokenfilter-customize] + +To customize the `word_delimiter_graph` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a `word_delimiter_graph` filter that uses the following rules: + +* Split tokens at non-alphanumeric characters, *except* the hyphen (`-`) character. +* Remove leading or trailing delimiters from each token. +* Do *not* split tokens at letter case transitions. +* Do *not* split tokens at letter-number transitions. +* Remove the English possessive (`'s`) from the end of each token. + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "keyword", + "filter": [ "my_custom_word_delimiter_graph_filter" ] + } + }, + "filter": { + "my_custom_word_delimiter_graph_filter": { + "type": "word_delimiter_graph", + "type_table": [ "- => ALPHA" ], + "split_on_case_change": false, + "split_on_numerics": false, + "stem_english_possessive": true + } + } + } + } +} +``` + + +## Differences between `word_delimiter_graph` and `word_delimiter` [analysis-word-delimiter-graph-differences] + +Both the `word_delimiter_graph` and [`word_delimiter`](/reference/data-analysis/text-analysis/analysis-word-delimiter-tokenfilter.md) filters produce tokens that span multiple positions when any of the following parameters are `true`: + +* [`catenate_all`](#word-delimiter-graph-tokenfilter-catenate-all) +* [`catenate_numbers`](#word-delimiter-graph-tokenfilter-catenate-numbers) +* [`catenate_words`](#word-delimiter-graph-tokenfilter-catenate-words) +* [`preserve_original`](#word-delimiter-graph-tokenfilter-preserve-original) + +However, only the `word_delimiter_graph` filter assigns multi-position tokens a `positionLength` attribute, which indicates the number of positions a token spans. This ensures the `word_delimiter_graph` filter always produces valid [token graphs](docs-content://manage-data/data-store/text-analysis/token-graphs.md). + +The `word_delimiter` filter does not assign multi-position tokens a `positionLength` attribute. This means it produces invalid graphs for streams including these tokens. + +While indexing does not support token graphs containing multi-position tokens, queries, such as the [`match_phrase`](/reference/query-languages/query-dsl-match-query-phrase.md) query, can use these graphs to generate multiple sub-queries from a single query string. + +To see how token graphs produced by the `word_delimiter` and `word_delimiter_graph` filters differ, check out the following example. + +:::::{dropdown} **Example** +$$$analysis-word-delimiter-graph-basic-token-graph$$$ +**Basic token graph** + +Both the `word_delimiter` and `word_delimiter_graph` produce the following token graph for `PowerShot2000` when the following parameters are `false`: + +* [`catenate_all`](#word-delimiter-graph-tokenfilter-catenate-all) +* [`catenate_numbers`](#word-delimiter-graph-tokenfilter-catenate-numbers) +* [`catenate_words`](#word-delimiter-graph-tokenfilter-catenate-words) +* [`preserve_original`](#word-delimiter-graph-tokenfilter-preserve-original) + +This graph does not contain multi-position tokens. All tokens span only one position. + +:::{image} ../../../images/token-graph-basic.svg +:alt: token graph basic +::: + +$$$analysis-word-delimiter-graph-wdg-token-graph$$$ +**`word_delimiter_graph` graph with a multi-position token** + +The `word_delimiter_graph` filter produces the following token graph for `PowerShot2000` when `catenate_words` is `true`. + +This graph correctly indicates the catenated `PowerShot` token spans two positions. + +:::{image} ../../../images/token-graph-wdg.svg +:alt: token graph wdg +::: + +$$$analysis-word-delimiter-graph-wd-token-graph$$$ +**`word_delimiter` graph with a multi-position token** + +When `catenate_words` is `true`, the `word_delimiter` filter produces the following token graph for `PowerShot2000`. + +Note that the catenated `PowerShot` token should span two positions but only spans one in the token graph, making it invalid. + +:::{image} ../../../images/token-graph-wd.svg +:alt: token graph wd +::: + +::::: diff --git a/docs/reference/data-analysis/text-analysis/analysis-word-delimiter-tokenfilter.md b/docs/reference/data-analysis/text-analysis/analysis-word-delimiter-tokenfilter.md new file mode 100644 index 0000000000000..847d92b73d42a --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analysis-word-delimiter-tokenfilter.md @@ -0,0 +1,226 @@ +--- +navigation_title: "Word delimiter" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-word-delimiter-tokenfilter.html +--- + +# Word delimiter token filter [analysis-word-delimiter-tokenfilter] + + +::::{warning} +We recommend using the [`word_delimiter_graph`](/reference/data-analysis/text-analysis/analysis-word-delimiter-graph-tokenfilter.md) instead of the `word_delimiter` filter. + +The `word_delimiter` filter can produce invalid token graphs. See [Differences between `word_delimiter_graph` and `word_delimiter`](/reference/data-analysis/text-analysis/analysis-word-delimiter-graph-tokenfilter.md#analysis-word-delimiter-graph-differences). + +The `word_delimiter` filter also uses Lucene’s [WordDelimiterFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.md), which is marked as deprecated. + +:::: + + +Splits tokens at non-alphanumeric characters. The `word_delimiter` filter also performs optional token normalization based on a set of rules. By default, the filter uses the following rules: + +* Split tokens at non-alphanumeric characters. The filter uses these characters as delimiters. For example: `Super-Duper` → `Super`, `Duper` +* Remove leading or trailing delimiters from each token. For example: `XL---42+'Autocoder'` → `XL`, `42`, `Autocoder` +* Split tokens at letter case transitions. For example: `PowerShot` → `Power`, `Shot` +* Split tokens at letter-number transitions. For example: `XL500` → `XL`, `500` +* Remove the English possessive (`'s`) from the end of each token. For example: `Neil's` → `Neil` + +::::{tip} +The `word_delimiter` filter was designed to remove punctuation from complex identifiers, such as product IDs or part numbers. For these use cases, we recommend using the `word_delimiter` filter with the [`keyword`](/reference/data-analysis/text-analysis/analysis-keyword-tokenizer.md) tokenizer. + +Avoid using the `word_delimiter` filter to split hyphenated words, such as `wi-fi`. Because users often search for these words both with and without hyphens, we recommend using the [`synonym_graph`](/reference/data-analysis/text-analysis/analysis-synonym-graph-tokenfilter.md) filter instead. + +:::: + + +## Example [analysis-word-delimiter-tokenfilter-analyze-ex] + +The following [analyze API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-analyze) request uses the `word_delimiter` filter to split `Neil's-Super-Duper-XL500--42+AutoCoder` into normalized tokens using the filter’s default rules: + +```console +GET /_analyze +{ + "tokenizer": "keyword", + "filter": [ "word_delimiter" ], + "text": "Neil's-Super-Duper-XL500--42+AutoCoder" +} +``` + +The filter produces the following tokens: + +```txt +[ Neil, Super, Duper, XL, 500, 42, Auto, Coder ] +``` + + +## Add to an analyzer [_add_to_an_analyzer] + +The following [create index API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create) request uses the `word_delimiter` filter to configure a new [custom analyzer](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "keyword", + "filter": [ "word_delimiter" ] + } + } + } + } +} +``` + +::::{warning} +Avoid using the `word_delimiter` filter with tokenizers that remove punctuation, such as the [`standard`](/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md) tokenizer. This could prevent the `word_delimiter` filter from splitting tokens correctly. It can also interfere with the filter’s configurable parameters, such as `catenate_all` or `preserve_original`. We recommend using the [`keyword`](/reference/data-analysis/text-analysis/analysis-keyword-tokenizer.md) or [`whitespace`](/reference/data-analysis/text-analysis/analysis-whitespace-tokenizer.md) tokenizer instead. + +:::: + + + +## Configurable parameters [word-delimiter-tokenfilter-configure-parms] + +`catenate_all` +: (Optional, Boolean) If `true`, the filter produces catenated tokens for chains of alphanumeric characters separated by non-alphabetic delimiters. For example: `super-duper-xl-500` → [ `super`, **`superduperxl500`**, `duper`, `xl`, `500` ]. Defaults to `false`. + +::::{warning} +When used for search analysis, catenated tokens can cause problems for the [`match_phrase`](/reference/query-languages/query-dsl-match-query-phrase.md) query and other queries that rely on token position for matching. Avoid setting this parameter to `true` if you plan to use these queries. + +:::: + + + +`catenate_numbers` +: (Optional, Boolean) If `true`, the filter produces catenated tokens for chains of numeric characters separated by non-alphabetic delimiters. For example: `01-02-03` → [ `01`, **`010203`**, `02`, `03` ]. Defaults to `false`. + +::::{warning} +When used for search analysis, catenated tokens can cause problems for the [`match_phrase`](/reference/query-languages/query-dsl-match-query-phrase.md) query and other queries that rely on token position for matching. Avoid setting this parameter to `true` if you plan to use these queries. + +:::: + + + +`catenate_words` +: (Optional, Boolean) If `true`, the filter produces catenated tokens for chains of alphabetical characters separated by non-alphabetic delimiters. For example: `super-duper-xl` → [ `super`, **`superduperxl`**, `duper`, `xl` ]. Defaults to `false`. + +::::{warning} +When used for search analysis, catenated tokens can cause problems for the [`match_phrase`](/reference/query-languages/query-dsl-match-query-phrase.md) query and other queries that rely on token position for matching. Avoid setting this parameter to `true` if you plan to use these queries. + +:::: + + + +`generate_number_parts` +: (Optional, Boolean) If `true`, the filter includes tokens consisting of only numeric characters in the output. If `false`, the filter excludes these tokens from the output. Defaults to `true`. + +`generate_word_parts` +: (Optional, Boolean) If `true`, the filter includes tokens consisting of only alphabetical characters in the output. If `false`, the filter excludes these tokens from the output. Defaults to `true`. + +`preserve_original` +: (Optional, Boolean) If `true`, the filter includes the original version of any split tokens in the output. This original version includes non-alphanumeric delimiters. For example: `super-duper-xl-500` → [ **`super-duper-xl-500`**, `super`, `duper`, `xl`, `500` ]. Defaults to `false`. + +`protected_words` +: (Optional, array of strings) Array of tokens the filter won’t split. + +`protected_words_path` +: (Optional, string) Path to a file that contains a list of tokens the filter won’t split. + +This path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each token in the file must be separated by a line break. + + +`split_on_case_change` +: (Optional, Boolean) If `true`, the filter splits tokens at letter case transitions. For example: `camelCase` → [ `camel`, `Case` ]. Defaults to `true`. + +`split_on_numerics` +: (Optional, Boolean) If `true`, the filter splits tokens at letter-number transitions. For example: `j2se` → [ `j`, `2`, `se` ]. Defaults to `true`. + +`stem_english_possessive` +: (Optional, Boolean) If `true`, the filter removes the English possessive (`'s`) from the end of each token. For example: `O'Neil's` → [ `O`, `Neil` ]. Defaults to `true`. + +`type_table` +: (Optional, array of strings) Array of custom type mappings for characters. This allows you to map non-alphanumeric characters as numeric or alphanumeric to avoid splitting on those characters. + +For example, the following array maps the plus (`+`) and hyphen (`-`) characters as alphanumeric, which means they won’t be treated as delimiters: + +`[ "+ => ALPHA", "- => ALPHA" ]` + +Supported types include: + +* `ALPHA` (Alphabetical) +* `ALPHANUM` (Alphanumeric) +* `DIGIT` (Numeric) +* `LOWER` (Lowercase alphabetical) +* `SUBWORD_DELIM` (Non-alphanumeric delimiter) +* `UPPER` (Uppercase alphabetical) + + +`type_table_path` +: (Optional, string) Path to a file that contains custom type mappings for characters. This allows you to map non-alphanumeric characters as numeric or alphanumeric to avoid splitting on those characters. + +For example, the contents of this file may contain the following: + +```txt +# Map the $, %, '.', and ',' characters to DIGIT +# This might be useful for financial data. +$ => DIGIT +% => DIGIT +. => DIGIT +\\u002C => DIGIT + +# in some cases you might not want to split on ZWJ +# this also tests the case where we need a bigger byte[] +# see https://en.wikipedia.org/wiki/Zero-width_joiner +\\u200D => ALPHANUM +``` + +Supported types include: + +* `ALPHA` (Alphabetical) +* `ALPHANUM` (Alphanumeric) +* `DIGIT` (Numeric) +* `LOWER` (Lowercase alphabetical) +* `SUBWORD_DELIM` (Non-alphanumeric delimiter) +* `UPPER` (Uppercase alphabetical) + +This file path must be absolute or relative to the `config` location, and the file must be UTF-8 encoded. Each mapping in the file must be separated by a line break. + + + +## Customize [analysis-word-delimiter-tokenfilter-customize] + +To customize the `word_delimiter` filter, duplicate it to create the basis for a new custom token filter. You can modify the filter using its configurable parameters. + +For example, the following request creates a `word_delimiter` filter that uses the following rules: + +* Split tokens at non-alphanumeric characters, *except* the hyphen (`-`) character. +* Remove leading or trailing delimiters from each token. +* Do *not* split tokens at letter case transitions. +* Do *not* split tokens at letter-number transitions. +* Remove the English possessive (`'s`) from the end of each token. + +```console +PUT /my-index-000001 +{ + "settings": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "keyword", + "filter": [ "my_custom_word_delimiter_filter" ] + } + }, + "filter": { + "my_custom_word_delimiter_filter": { + "type": "word_delimiter", + "type_table": [ "- => ALPHA" ], + "split_on_case_change": false, + "split_on_numerics": false, + "stem_english_possessive": true + } + } + } + } +} +``` diff --git a/docs/reference/data-analysis/text-analysis/analyzer-reference.md b/docs/reference/data-analysis/text-analysis/analyzer-reference.md new file mode 100644 index 0000000000000..c2aa76bc1c1c8 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/analyzer-reference.md @@ -0,0 +1,46 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-analyzers.html +--- + +# Analyzer reference [analysis-analyzers] + +Elasticsearch ships with a wide range of built-in analyzers, which can be used in any index without further configuration: + +[Standard Analyzer](/reference/data-analysis/text-analysis/analysis-standard-analyzer.md) +: The `standard` analyzer divides text into terms on word boundaries, as defined by the Unicode Text Segmentation algorithm. It removes most punctuation, lowercases terms, and supports removing stop words. + +[Simple Analyzer](/reference/data-analysis/text-analysis/analysis-simple-analyzer.md) +: The `simple` analyzer divides text into terms whenever it encounters a character which is not a letter. It lowercases all terms. + +[Whitespace Analyzer](/reference/data-analysis/text-analysis/analysis-whitespace-analyzer.md) +: The `whitespace` analyzer divides text into terms whenever it encounters any whitespace character. It does not lowercase terms. + +[Stop Analyzer](/reference/data-analysis/text-analysis/analysis-stop-analyzer.md) +: The `stop` analyzer is like the `simple` analyzer, but also supports removal of stop words. + +[Keyword Analyzer](/reference/data-analysis/text-analysis/analysis-keyword-analyzer.md) +: The `keyword` analyzer is a noop analyzer that accepts whatever text it is given and outputs the exact same text as a single term. + +[Pattern Analyzer](/reference/data-analysis/text-analysis/analysis-pattern-analyzer.md) +: The `pattern` analyzer uses a regular expression to split the text into terms. It supports lower-casing and stop words. + +[Language Analyzers](/reference/data-analysis/text-analysis/analysis-lang-analyzer.md) +: Elasticsearch provides many language-specific analyzers like `english` or `french`. + +[Fingerprint Analyzer](/reference/data-analysis/text-analysis/analysis-fingerprint-analyzer.md) +: The `fingerprint` analyzer is a specialist analyzer which creates a fingerprint which can be used for duplicate detection. + + +## Custom analyzers [_custom_analyzers] + +If you do not find an analyzer suitable for your needs, you can create a [`custom`](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md) analyzer which combines the appropriate [character filters](/reference/data-analysis/text-analysis/character-filter-reference.md), [tokenizer](/reference/data-analysis/text-analysis/tokenizer-reference.md), and [token filters](/reference/data-analysis/text-analysis/token-filter-reference.md). + + + + + + + + + diff --git a/docs/reference/data-analysis/text-analysis/character-filter-reference.md b/docs/reference/data-analysis/text-analysis/character-filter-reference.md new file mode 100644 index 0000000000000..e3a649844ed84 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/character-filter-reference.md @@ -0,0 +1,25 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-charfilters.html +--- + +# Character filter reference [analysis-charfilters] + +*Character filters* are used to preprocess the stream of characters before it is passed to the [tokenizer](/reference/data-analysis/text-analysis/tokenizer-reference.md). + +A character filter receives the original text as a stream of characters and can transform the stream by adding, removing, or changing characters. For instance, a character filter could be used to convert Hindu-Arabic numerals (٠‎١٢٣٤٥٦٧٨‎٩‎) into their Arabic-Latin equivalents (0123456789), or to strip HTML elements like `` from the stream. + +Elasticsearch has a number of built in character filters which can be used to build [custom analyzers](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + +[HTML Strip Character Filter](/reference/data-analysis/text-analysis/analysis-htmlstrip-charfilter.md) +: The `html_strip` character filter strips out HTML elements like `` and decodes HTML entities like `&`. + +[Mapping Character Filter](/reference/data-analysis/text-analysis/analysis-mapping-charfilter.md) +: The `mapping` character filter replaces any occurrences of the specified strings with the specified replacements. + +[Pattern Replace Character Filter](/reference/data-analysis/text-analysis/analysis-pattern-replace-charfilter.md) +: The `pattern_replace` character filter replaces any characters matching a regular expression with the specified replacement. + + + + diff --git a/docs/reference/data-analysis/text-analysis/index.md b/docs/reference/data-analysis/text-analysis/index.md new file mode 100644 index 0000000000000..9bc7f5386666e --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/index.md @@ -0,0 +1,11 @@ +# Text analysis components + +% TO-DO: Add links to "Data analysis basics"% + +This section contains reference information for text analysis components features, including: + +* [Analyzers](/reference/data-analysis/text-analysis/analyzer-reference.md) +* [Tokenizers](/reference/data-analysis/text-analysis/tokenizer-reference.md) +* [Token filters](/reference/data-analysis/text-analysis/token-filter-reference.md) +* [Character filters](/reference/data-analysis/text-analysis/character-filter-reference.md) +* [Normalizers](/reference/data-analysis/text-analysis/normalizers.md) diff --git a/docs/reference/data-analysis/text-analysis/normalizers.md b/docs/reference/data-analysis/text-analysis/normalizers.md new file mode 100644 index 0000000000000..b9edfd9a7f5a5 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/normalizers.md @@ -0,0 +1,50 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-normalizers.html +--- + +# Normalizers [analysis-normalizers] + +Normalizers are similar to analyzers except that they may only emit a single token. As a consequence, they do not have a tokenizer and only accept a subset of the available char filters and token filters. Only the filters that work on a per-character basis are allowed. For instance a lowercasing filter would be allowed, but not a stemming filter, which needs to look at the keyword as a whole. The current list of filters that can be used in a normalizer definition are: `arabic_normalization`, `asciifolding`, `bengali_normalization`, `cjk_width`, `decimal_digit`, `elision`, `german_normalization`, `hindi_normalization`, `indic_normalization`, `lowercase`, `pattern_replace`, `persian_normalization`, `scandinavian_folding`, `serbian_normalization`, `sorani_normalization`, `trim`, `uppercase`. + +Elasticsearch ships with a `lowercase` built-in normalizer. For other forms of normalization, a custom configuration is required. + + +## Custom normalizers [_custom_normalizers] + +Custom normalizers take a list of [character filters](/reference/data-analysis/text-analysis/character-filter-reference.md) and a list of [token filters](/reference/data-analysis/text-analysis/token-filter-reference.md). + +```console +PUT index +{ + "settings": { + "analysis": { + "char_filter": { + "quote": { + "type": "mapping", + "mappings": [ + "« => \"", + "» => \"" + ] + } + }, + "normalizer": { + "my_normalizer": { + "type": "custom", + "char_filter": ["quote"], + "filter": ["lowercase", "asciifolding"] + } + } + } + }, + "mappings": { + "properties": { + "foo": { + "type": "keyword", + "normalizer": "my_normalizer" + } + } + } +} +``` + diff --git a/docs/reference/data-analysis/text-analysis/token-filter-reference.md b/docs/reference/data-analysis/text-analysis/token-filter-reference.md new file mode 100644 index 0000000000000..6f3f094362c93 --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/token-filter-reference.md @@ -0,0 +1,59 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-tokenfilters.html +--- + +# Token filter reference [analysis-tokenfilters] + +Token filters accept a stream of tokens from a [tokenizer](/reference/data-analysis/text-analysis/tokenizer-reference.md) and can modify tokens (eg lowercasing), delete tokens (eg remove stopwords) or add tokens (eg synonyms). + +{{es}} has a number of built-in token filters you can use to build [custom analyzers](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/reference/data-analysis/text-analysis/tokenizer-reference.md b/docs/reference/data-analysis/text-analysis/tokenizer-reference.md new file mode 100644 index 0000000000000..4ac3aaa7ea78b --- /dev/null +++ b/docs/reference/data-analysis/text-analysis/tokenizer-reference.md @@ -0,0 +1,100 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-tokenizers.html +--- + +# Tokenizer reference [analysis-tokenizers] + +::::{admonition} Difference between {{es}} tokenization and neural tokenization +:class: note + +{{es}}'s tokenization process produces linguistic tokens, optimized for search and retrieval. This differs from neural tokenization in the context of machine learning and natural language processing. Neural tokenizers translate strings into smaller, subword tokens, which are encoded into vectors for consumptions by neural networks. {{es}} does not have built-in neural tokenizers. + +:::: + + +A *tokenizer* receives a stream of characters, breaks it up into individual *tokens* (usually individual words), and outputs a stream of *tokens*. For instance, a [`whitespace`](/reference/data-analysis/text-analysis/analysis-whitespace-tokenizer.md) tokenizer breaks text into tokens whenever it sees any whitespace. It would convert the text `"Quick brown fox!"` into the terms `[Quick, brown, fox!]`. + +The tokenizer is also responsible for recording the following: + +* Order or *position* of each term (used for phrase and word proximity queries) +* Start and end *character offsets* of the original word which the term represents (used for highlighting search snippets). +* *Token type*, a classification of each term produced, such as ``, ``, or ``. Simpler analyzers only produce the `word` token type. + +Elasticsearch has a number of built in tokenizers which can be used to build [custom analyzers](docs-content://manage-data/data-store/text-analysis/create-custom-analyzer.md). + + +## Word Oriented Tokenizers [_word_oriented_tokenizers] + +The following tokenizers are usually used for tokenizing full text into individual words: + +[Standard Tokenizer](/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md) +: The `standard` tokenizer divides text into terms on word boundaries, as defined by the Unicode Text Segmentation algorithm. It removes most punctuation symbols. It is the best choice for most languages. + +[Letter Tokenizer](/reference/data-analysis/text-analysis/analysis-letter-tokenizer.md) +: The `letter` tokenizer divides text into terms whenever it encounters a character which is not a letter. + +[Lowercase Tokenizer](/reference/data-analysis/text-analysis/analysis-lowercase-tokenizer.md) +: The `lowercase` tokenizer, like the `letter` tokenizer, divides text into terms whenever it encounters a character which is not a letter, but it also lowercases all terms. + +[Whitespace Tokenizer](/reference/data-analysis/text-analysis/analysis-whitespace-tokenizer.md) +: The `whitespace` tokenizer divides text into terms whenever it encounters any whitespace character. + +[UAX URL Email Tokenizer](/reference/data-analysis/text-analysis/analysis-uaxurlemail-tokenizer.md) +: The `uax_url_email` tokenizer is like the `standard` tokenizer except that it recognises URLs and email addresses as single tokens. + +[Classic Tokenizer](/reference/data-analysis/text-analysis/analysis-classic-tokenizer.md) +: The `classic` tokenizer is a grammar based tokenizer for the English Language. + +[Thai Tokenizer](/reference/data-analysis/text-analysis/analysis-thai-tokenizer.md) +: The `thai` tokenizer segments Thai text into words. + + +## Partial Word Tokenizers [_partial_word_tokenizers] + +These tokenizers break up text or words into small fragments, for partial word matching: + +[N-Gram Tokenizer](/reference/data-analysis/text-analysis/analysis-ngram-tokenizer.md) +: The `ngram` tokenizer can break up text into words when it encounters any of a list of specified characters (e.g. whitespace or punctuation), then it returns n-grams of each word: a sliding window of continuous letters, e.g. `quick` → `[qu, ui, ic, ck]`. + +[Edge N-Gram Tokenizer](/reference/data-analysis/text-analysis/analysis-edgengram-tokenizer.md) +: The `edge_ngram` tokenizer can break up text into words when it encounters any of a list of specified characters (e.g. whitespace or punctuation), then it returns n-grams of each word which are anchored to the start of the word, e.g. `quick` → `[q, qu, qui, quic, quick]`. + + +## Structured Text Tokenizers [_structured_text_tokenizers] + +The following tokenizers are usually used with structured text like identifiers, email addresses, zip codes, and paths, rather than with full text: + +[Keyword Tokenizer](/reference/data-analysis/text-analysis/analysis-keyword-tokenizer.md) +: The `keyword` tokenizer is a noop tokenizer that accepts whatever text it is given and outputs the exact same text as a single term. It can be combined with token filters like [`lowercase`](/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md) to normalise the analysed terms. + +[Pattern Tokenizer](/reference/data-analysis/text-analysis/analysis-pattern-tokenizer.md) +: The `pattern` tokenizer uses a regular expression to either split text into terms whenever it matches a word separator, or to capture matching text as terms. + +[Simple Pattern Tokenizer](/reference/data-analysis/text-analysis/analysis-simplepattern-tokenizer.md) +: The `simple_pattern` tokenizer uses a regular expression to capture matching text as terms. It uses a restricted subset of regular expression features and is generally faster than the `pattern` tokenizer. + +[Char Group Tokenizer](/reference/data-analysis/text-analysis/analysis-chargroup-tokenizer.md) +: The `char_group` tokenizer is configurable through sets of characters to split on, which is usually less expensive than running regular expressions. + +[Simple Pattern Split Tokenizer](/reference/data-analysis/text-analysis/analysis-simplepatternsplit-tokenizer.md) +: The `simple_pattern_split` tokenizer uses the same restricted regular expression subset as the `simple_pattern` tokenizer, but splits the input at matches rather than returning the matches as terms. + +[Path Tokenizer](/reference/data-analysis/text-analysis/analysis-pathhierarchy-tokenizer.md) +: The `path_hierarchy` tokenizer takes a hierarchical value like a filesystem path, splits on the path separator, and emits a term for each component in the tree, e.g. `/foo/bar/baz` → `[/foo, /foo/bar, /foo/bar/baz ]`. + + + + + + + + + + + + + + + + diff --git a/docs/reference/data-management.asciidoc b/docs/reference/data-management.asciidoc deleted file mode 100644 index 4da62e5b2c7c0..0000000000000 --- a/docs/reference/data-management.asciidoc +++ /dev/null @@ -1,61 +0,0 @@ -[role="xpack"] -[[data-management]] -= Data management - -[partintro] --- -The data you store in {es} generally falls into one of two categories: - -* *Content*: a collection of items you want to search, such as a catalog of products -* *Time series data*: a stream of continuously-generated timestamped data, such as log entries - -*Content* might be frequently updated, -but the value of the content remains relatively constant over time. -You want to be able to retrieve items quickly regardless of how old they are. - -*Time series data* keeps accumulating over time, so you need strategies for -balancing the value of the data against the cost of storing it. -As it ages, it tends to become less important and less-frequently accessed, -so you can move it to less expensive, less performant hardware. -For your oldest data, what matters is that you have access to the data. -It's ok if queries take longer to complete. - -To help you manage your data, {es} offers you the following options: - -* <> -* <> -* {curator-ref-current}/about.html[Elastic Curator] - -**{ilm-init}** can be used to manage both indices and data streams. It allows you to do the following: - -* Define the retention period of your data. The retention period is the minimum time your data will be stored in {es}. -Data older than this period can be deleted by {es}. -* Define <> of data nodes with different performance characteristics. -* Automatically transition indices through the data tiers according to your performance needs and retention policies. -* Leverage <> stored in a remote repository to provide resiliency -for your older indices while reducing operating costs and maintaining search performance. -* Perform <> of data stored on less-performant hardware. - -**Data stream lifecycle** is less feature rich but is focused on simplicity. It allows you to do the following: - -* Define the retention period of your data. The retention period is the minimum time your data will be stored in {es}. -Data older than this period can be deleted by {es} at a later time. -* Improve the performance of your data stream by performing background operations that will optimise the way your data stream is stored. - -**Elastic Curator** is a tool that allows you to manage your indices and snapshots using user-defined filters and predefined actions. If ILM provides the functionality to manage your index lifecycle, and you have at least a Basic license, consider using ILM in place of Curator. Many stack components make use of ILM by default. {curator-ref-current}/ilm.html[Learn more]. - -NOTE: <> is a deprecated {es} feature that allows you to manage the amount of data that is stored in your cluster, similar to the downsampling functionality of {ilm-init} and data stream lifecycle. This feature should not be used for new deployments. - -[TIP] -==== -{ilm-init} is not available on {es-serverless}. - -In an {ecloud} or self-managed environment, ILM lets you automatically transition indices through data tiers according to your performance needs and retention requirements. This allows you to balance hardware costs with performance. {es-serverless} eliminates this complexity by optimizing your cluster performance for you. - -Data stream lifecycle is an optimized lifecycle tool that lets you focus on the most common lifecycle management needs, without unnecessary hardware-centric concepts like data tiers. -==== --- - -include::ilm/index.asciidoc[] - -include::datatiers.asciidoc[] diff --git a/docs/reference/data-management/migrate-index-allocation-filters.asciidoc b/docs/reference/data-management/migrate-index-allocation-filters.asciidoc deleted file mode 100644 index ee7d5640d53df..0000000000000 --- a/docs/reference/data-management/migrate-index-allocation-filters.asciidoc +++ /dev/null @@ -1,232 +0,0 @@ -[role="xpack"] -[[migrate-index-allocation-filters]] -== Migrate index allocation filters to node roles - -If you currently use <> and -<> to -move indices through <> in a -https://www.elastic.co/blog/implementing-hot-warm-cold-in-elasticsearch-with-index-lifecycle-management[hot-warm-cold architecture], -we recommend that you switch to using the built-in node roles -and automatic <>. -Using node roles enables {ilm-init} to automatically -move indices between data tiers. - -NOTE: While we recommend relying on automatic data tier allocation to manage -your data in a hot-warm-cold architecture, -you can still use attribute-based allocation filters to -control shard allocation for other purposes. - -{ess} and {ece} can perform the migration automatically. For self-managed -deployments, you need to manually update your configuration, ILM policies, and -indices to switch to node roles. - -[discrete] -[[cloud-migrate-to-node-roles]] -=== Automatically migrate to node roles on {ess} or {ece} - -If you are using node attributes from the default deployment template in {ess} or {ece}, you will be -prompted to switch to node roles when you: - -* Upgrade to {es} 7.10 or higher -* Deploy a warm, cold, or frozen data tier -* {cloud}/ec-autoscaling.html[Enable autoscaling] - -These actions automatically update your cluster configuration -and {ilm-init} policies to use node roles. Additionally, upgrading to -version 7.14 or higher automatically update {ilm-init} policies -whenever any configuration change is applied to your deployment. - - -If you use custom index templates, check them after the automatic migration -completes and remove any <>. - -NOTE: You do not need to take any further action after the automatic migration. -The following manual steps are only necessary if you do not allow the automatic -migration or have a self-managed deployment. - -[discrete] -[[on-prem-migrate-to-node-roles]] -=== Migrate to node roles on self-managed deployments - -To switch to using node roles: - -. <> to the appropriate data tier. -. <> from your {ilm} policy. -. <> -on new indices. -. Update existing indices to <>. - - -[discrete] -[[assign-data-tier]] -==== Assign data nodes to a data tier - -Configure the appropriate roles for each data node to assign it to one or more -data tiers: `data_hot`, `data_content`, `data_warm`, `data_cold`, or `data_frozen`. -A node can also have other <>. By default, new nodes are -configured with all roles. - -When you add a data tier to an {ess} deployment, -one or more nodes are automatically configured with the corresponding role. -To explicitly change the role of a node in an {ess} deployment, use the -{cloud}/ec-api-deployment-crud.html#ec_update_a_deployment[Update deployment API]. -Replace the node's `node_type` configuration with the appropriate `node_roles`. -For example, the following configuration adds the node to the hot and content -tiers, and enables it to act as an ingest node, remote, and transform node. - -[source,yaml] ----- -"node_roles": [ - "data_hot", - "data_content", - "ingest", - "remote_cluster_client", - "transform" -], ----- - -If you are directly managing your own cluster, -configure the appropriate roles for each node in `elasticsearch.yml`. -For example, the following setting configures a node to be a data-only -node in the hot and content tiers. - -[source,yaml] ----- -node.roles [ data_hot, data_content ] ----- - -[discrete] -[[remove-custom-allocation-settings]] -==== Remove custom allocation settings from existing {ilm-init} policies - -Update the allocate action for each lifecycle phase to remove the attribute-based -allocation settings. {ilm-init} will inject a -<> action into each phase -to automatically transition the indices through the data tiers. - -If the allocate action does not set the number of replicas, -remove the allocate action entirely. (An empty allocate action is invalid.) - -IMPORTANT: The policy must specify the corresponding phase for each data tier in -your architecture. Each phase must be present so {ilm-init} can inject the -migrate action to move indices through the data tiers. -If you don't need to perform any other actions, the phase can be empty. -For example, if you enable the warm and cold data tiers for a deployment, -your policy must include the hot, warm, and cold phases. - -[discrete] -[[stop-setting-custom-hot-attribute]] -==== Stop setting the custom hot attribute on new indices - -When you create a data stream, its first backing index -is now automatically assigned to `data_hot` nodes. -Similarly, when you directly create an index, it -is automatically assigned to `data_content` nodes. - -On {ess} deployments, remove the `cloud-hot-warm-allocation-0` index template -that set the hot shard allocation attribute on all indices. - -[source,console] ----- -DELETE _template/.cloud-hot-warm-allocation-0 ----- -// TEST[skip:no cloud template] - -If you're using a custom index template, update it to remove the <> you used to assign new indices to the hot tier. - -To completely avoid the issues that raise when mixing the tier preference and -custom attribute routing setting we also recommend updating all the legacy, -composable, and component templates to remove the <> -from the settings they configure. - -[discrete] -[[set-tier-preference]] -==== Set a tier preference for existing indices - -{ilm-init} automatically transitions managed indices through the available -data tiers by automatically injecting a <> -into each phase. - -To enable {ilm-init} to move an _existing_ managed index -through the data tiers, update the index settings to: - -. Remove the custom allocation filter by setting it to `null`. -. Set the <>. - -For example, if your old template set the `data` attribute to `hot` -to allocate shards to the hot tier, set the `data` attribute to `null` -and set the `_tier_preference` to `data_hot`. - -//// -[source,console] ----- -PUT /my-index - -PUT /my-index/_settings -{ - "index.routing.allocation.require.data": "hot" -} ----- -//// - -[source,console] ----- -PUT my-index/_settings -{ - "index.routing.allocation.require.data": null, - "index.routing.allocation.include._tier_preference": "data_hot" -} ----- -// TEST[continued] - -For indices that have already transitioned out of the hot phase, -the tier preference should include the appropriate fallback tiers -to ensure index shards can be allocated if the preferred tier -is unavailable. -For example, specify the hot tier as the fallback for indices -already in the warm phase. - -[source,console] ----- -PUT my-index/_settings -{ - "index.routing.allocation.require.data": null, - "index.routing.allocation.include._tier_preference": "data_warm,data_hot" -} ----- -// TEST[continued] - -If an index is already in the cold phase, include the cold, warm, and hot tiers. - -For indices that have both the `_tier_preference` and `require.data` configured -but the `_tier_preference` is outdated (ie. the node attribute configuration -is "colder" than the configured `_tier_preference`), the migration needs to -remove the `require.data` attribute and update the `_tier_preference` to reflect -the correct tiering. - -eg. For an index with the following routing configuration: -[source,JSON] ----- -{ - "index.routing.allocation.require.data": "warm", - "index.routing.allocation.include._tier_preference": "data_hot" -} ----- - -The routing configuration should be fixed like so: -[source,console] ----- -PUT my-index/_settings -{ - "index.routing.allocation.require.data": null, - "index.routing.allocation.include._tier_preference": "data_warm,data_hot" -} ----- -// TEST[continued] - -This situation can occur in a system that defaults to data tiers when, e.g., -an ILM policy that uses node attributes is restored and transitions the managed -indices from the hot phase into the warm phase. In this case the node attribute -configuration indicates the correct tier where the index should be allocated. diff --git a/docs/reference/data-rollup-transform.asciidoc b/docs/reference/data-rollup-transform.asciidoc deleted file mode 100644 index 3116d4117f70e..0000000000000 --- a/docs/reference/data-rollup-transform.asciidoc +++ /dev/null @@ -1,23 +0,0 @@ -[[data-rollup-transform]] -= Roll up or transform your data - -[partintro] --- - -{es} offers the following methods for manipulating your data: - -* <> -+ -deprecated::[8.11.0,"Rollups will be removed in a future version. Use <> instead."] -+ -include::rollup/index.asciidoc[tag=rollup-intro] - -* <> -+ -include::transform/transforms.asciidoc[tag=transform-intro] - --- - -include::rollup/index.asciidoc[] - -include::transform/index.asciidoc[] diff --git a/docs/reference/data-store-architecture.asciidoc b/docs/reference/data-store-architecture.asciidoc deleted file mode 100644 index a0d504eb117c8..0000000000000 --- a/docs/reference/data-store-architecture.asciidoc +++ /dev/null @@ -1,24 +0,0 @@ -= Data store architecture - -[partintro] --- - -{es} is a distributed document store. Instead of storing information as rows of columnar data, {es} stores complex data structures that have been serialized as JSON documents. When you have multiple {es} nodes in a cluster, stored documents are distributed across the cluster and can be accessed immediately -from any node. - -The topics in this section provides information about the architecture of {es} and how it stores and retrieves data: - -* <>: Learn about the basic building blocks of an {es} cluster, including nodes, shards, primaries, and replicas. -* <>: Learn about the different roles that nodes can have in an {es} cluster. -* <>: Learn how {es} replicates read and write operations across shards and shard copies. -* <>: Learn how {es} allocates and balances shards across nodes. -** <>: Learn how to use custom node attributes to distribute shards across different racks or availability zones. -* <>: Learn how {es} caches search requests to improve performance. --- - -include::nodes-shards.asciidoc[] -include::node-roles.asciidoc[] -include::docs/data-replication.asciidoc[leveloffset=-1] -include::modules/shard-ops.asciidoc[] -include::modules/cluster/allocation_awareness.asciidoc[leveloffset=+1] -include::shard-request-cache.asciidoc[leveloffset=-1] diff --git a/docs/reference/data-streams/change-mappings-and-settings.asciidoc b/docs/reference/data-streams/change-mappings-and-settings.asciidoc deleted file mode 100644 index 1290f289e5bbd..0000000000000 --- a/docs/reference/data-streams/change-mappings-and-settings.asciidoc +++ /dev/null @@ -1,708 +0,0 @@ -[[modify-data-streams]] -== Modify a data stream - -[discrete] -[[data-streams-change-mappings-and-settings]] -=== Change mappings and settings for a data stream - -Each <> has a <>. Mappings and index settings from this template are applied to new -backing indices created for the stream. This includes the stream's first -backing index, which is auto-generated when the stream is created. - -Before creating a data stream, we recommend you carefully consider which -mappings and settings to include in this template. - -If you later need to change the mappings or settings for a data stream, you have -a few options: - -* <> -* <> -* <> -* <> - -TIP: If your changes include modifications to existing field mappings or -<>, a reindex is often required to -apply the changes to a data stream's backing indices. If you are already -performing a reindex, you can use the same process to add new field -mappings and change <>. See -<>. - -//// -[source,console] ----- -PUT /_ilm/policy/my-data-stream-policy -{ - "policy": { - "phases": { - "hot": { - "actions": { - "rollover": { - "max_primary_shard_size": "25GB" - } - } - }, - "delete": { - "min_age": "30d", - "actions": { - "delete": {} - } - } - } - } -} - -PUT /_index_template/my-data-stream-template -{ - "index_patterns": [ "my-data-stream*" ], - "data_stream": { } -} - -PUT /_index_template/new-data-stream-template -{ - "index_patterns": [ "new-data-stream*" ], - "data_stream": { } -} - -PUT /_data_stream/my-data-stream - -POST /my-data-stream/_rollover/ - -PUT /_data_stream/new-data-stream - -DELETE /_data_stream/*/_lifecycle ----- -// TESTSETUP - -[source,console] ----- -DELETE /_data_stream/my-data-stream*,new-data-stream* - -DELETE /_index_template/my-data-stream-template,new-data-stream-template - -DELETE /_ilm/policy/my-data-stream-policy ----- -// TEARDOWN -//// - -[discrete] -[[add-new-field-mapping-to-a-data-stream]] -==== Add a new field mapping to a data stream - -To add a mapping for a new field to a data stream, following these steps: - -. Update the index template used by the data stream. This ensures the new -field mapping is added to future backing indices created for the stream. -+ --- -For example, `my-data-stream-template` is an existing index template used by -`my-data-stream`. - -The following <> request adds a mapping -for a new field, `message`, to the template. - -[source,console] ----- -PUT /_index_template/my-data-stream-template -{ - "index_patterns": [ "my-data-stream*" ], - "data_stream": { }, - "priority": 500, - "template": { - "mappings": { - "properties": { - "message": { <1> - "type": "text" - } - } - } - } -} ----- -<1> Adds a mapping for the new `message` field. --- - -. Use the <> to add the new field mapping -to the data stream. By default, this adds the mapping to the stream's existing -backing indices, including the write index. -+ --- -The following update mapping API request adds the new `message` field mapping to -`my-data-stream`. - -[source,console] ----- -PUT /my-data-stream/_mapping -{ - "properties": { - "message": { - "type": "text" - } - } -} ----- --- -+ -To add the mapping only to the stream's write index, set the update mapping API's -`write_index_only` query parameter to `true`. -+ --- -The following update mapping request adds the new `message` field mapping only to -`my-data-stream`'s write index. The new field mapping is not added to -the stream's other backing indices. - -[source,console] ----- -PUT /my-data-stream/_mapping?write_index_only=true -{ - "properties": { - "message": { - "type": "text" - } - } -} ----- --- - -[discrete] -[[change-existing-field-mapping-in-a-data-stream]] -==== Change an existing field mapping in a data stream - -The documentation for each <> indicates -whether you can update it for an existing field using the -<>. To update these parameters for an -existing field, follow these steps: - -. Update the index template used by the data stream. This ensures the updated -field mapping is added to future backing indices created for the stream. -+ --- -For example, `my-data-stream-template` is an existing index template used by -`my-data-stream`. - -The following <> request changes the -argument for the `host.ip` field's <> -mapping parameter to `true`. - -[source,console] ----- -PUT /_index_template/my-data-stream-template -{ - "index_patterns": [ "my-data-stream*" ], - "data_stream": { }, - "priority": 500, - "template": { - "mappings": { - "properties": { - "host": { - "properties": { - "ip": { - "type": "ip", - "ignore_malformed": true <1> - } - } - } - } - } - } -} ----- -<1> Changes the `host.ip` field's `ignore_malformed` value to `true`. --- - -. Use the <> to apply the mapping changes -to the data stream. By default, this applies the changes to the stream's -existing backing indices, including the write index. -+ --- -The following <> request targets -`my-data-stream`. The request changes the argument for the `host.ip` -field's `ignore_malformed` mapping parameter to `true`. - -[source,console] ----- -PUT /my-data-stream/_mapping -{ - "properties": { - "host": { - "properties": { - "ip": { - "type": "ip", - "ignore_malformed": true - } - } - } - } -} ----- --- -+ -To apply the mapping changes only to the stream's write index, set the put -mapping API's `write_index_only` query parameter to `true`. -+ --- -The following update mapping request changes the `host.ip` field's mapping only for -`my-data-stream`'s write index. The change is not applied to the -stream's other backing indices. - -[source,console] ----- -PUT /my-data-stream/_mapping?write_index_only=true -{ - "properties": { - "host": { - "properties": { - "ip": { - "type": "ip", - "ignore_malformed": true - } - } - } - } -} ----- --- - -Except for supported mapping parameters, we don't recommend you change the -mapping or field data type of existing fields, even in a data stream's matching -index template or its backing indices. Changing the mapping of an existing -field could invalidate any data that’s already indexed. - -If you need to change the mapping of an existing field, create a new -data stream and reindex your data into it. See -<>. - -[discrete] -[[change-dynamic-index-setting-for-a-data-stream]] -==== Change a dynamic index setting for a data stream - -To change a <> for a data stream, -follow these steps: - -. Update the index template used by the data stream. This ensures the setting is -applied to future backing indices created for the stream. -+ --- -For example, `my-data-stream-template` is an existing index template used by -`my-data-stream`. - -The following <> request changes the -template's `index.refresh_interval` index setting to `30s` (30 seconds). - -[source,console] ----- -PUT /_index_template/my-data-stream-template -{ - "index_patterns": [ "my-data-stream*" ], - "data_stream": { }, - "priority": 500, - "template": { - "settings": { - "index.refresh_interval": "30s" <1> - } - } -} ----- -<1> Changes the `index.refresh_interval` setting to `30s` (30 seconds). --- - -. Use the <> to update the -index setting for the data stream. By default, this applies the setting to -the stream's existing backing indices, including the write index. -+ --- -The following update index settings API request updates the -`index.refresh_interval` setting for `my-data-stream`. - -[source,console] ----- -PUT /my-data-stream/_settings -{ - "index": { - "refresh_interval": "30s" - } -} ----- --- - -IMPORTANT: To change the `index.lifecycle.name` setting, first use the -<> to remove the existing {ilm-init} -policy. See <>. - -[discrete] -[[change-static-index-setting-for-a-data-stream]] -==== Change a static index setting for a data stream - -<> can only be set when a backing -index is created. You cannot update static index settings using the -<>. - -To apply a new static setting to future backing indices, update the index -template used by the data stream. The setting is automatically applied to any -backing index created after the update. - -For example, `my-data-stream-template` is an existing index template used by -`my-data-stream`. - -The following <> requests -adds new `sort.field` and `sort.order index` settings to the template. - -[source,console] ----- -PUT /_index_template/my-data-stream-template -{ - "index_patterns": [ "my-data-stream*" ], - "data_stream": { }, - "priority": 500, - "template": { - "settings": { - "sort.field": [ "@timestamp"], <1> - "sort.order": [ "desc"] <2> - } - } -} ----- -<1> Adds the `sort.field` index setting. -<2> Adds the `sort.order` index setting. - -If wanted, you can <> to immediately apply the setting to the data stream’s write index. This -affects any new data added to the stream after the rollover. However, it does -not affect the data stream's existing backing indices or existing data. - -To apply static setting changes to existing backing indices, you must create a -new data stream and reindex your data into it. See -<>. - -[discrete] -[[data-streams-use-reindex-to-change-mappings-settings]] -==== Use reindex to change mappings or settings - -You can use a reindex to change the mappings or settings of a data stream. This -is often required to change the data type of an existing field or update static -index settings for backing indices. - -To reindex a data stream, first create or update an index template so that it -contains the wanted mapping or setting changes. You can then reindex the -existing data stream into a new stream matching the template. This applies the -mapping and setting changes in the template to each document and backing index -added to the new data stream. These changes also affect any future backing -index created by the new stream. - -Follow these steps: - -. Choose a name or index pattern for a new data stream. This new data -stream will contain data from your existing stream. -+ --- -You can use the resolve index API to check if the name or pattern matches any -existing indices, aliases, or data streams. If so, you should consider using -another name or pattern. - -The following resolve index API request checks for any existing indices, -aliases, or data streams that start with `new-data-stream`. If not, the -`new-data-stream*` index pattern can be used to create a new data stream. - -[source,console] ----- -GET /_resolve/index/new-data-stream* ----- - -The API returns the following response, indicating no existing targets match -this pattern. - -[source,console-result] ----- -{ - "indices": [ ], - "aliases": [ ], - "data_streams": [ ] -} ----- -// TESTRESPONSE[s/"data_streams": \[ \]/"data_streams": $body.data_streams/] --- - -. Create or update an index template. This template should contain the -mappings and settings you'd like to apply to the new data stream's backing -indices. -+ -This index template must meet the -<>. It -should also contain your previously chosen name or index pattern in the -`index_patterns` property. -+ -TIP: If you are only adding or changing a few things, we recommend you create a -new template by copying an existing one and modifying it as needed. -+ --- -For example, `my-data-stream-template` is an existing index template used by -`my-data-stream`. - -The following <> request -creates a new index template, `new-data-stream-template`. -`new-data-stream-template` uses `my-data-stream-template` as its basis, with the -following changes: - -* The index pattern in `index_patterns` matches any index or data stream - starting with `new-data-stream`. -* The `@timestamp` field mapping uses the `date_nanos` field data type rather - than the `date` data type. -* The template includes `sort.field` and `sort.order` index settings, which were - not in the original `my-data-stream-template` template. - -[source,console] ----- -PUT /_index_template/new-data-stream-template -{ - "index_patterns": [ "new-data-stream*" ], - "data_stream": { }, - "priority": 500, - "template": { - "mappings": { - "properties": { - "@timestamp": { - "type": "date_nanos" <1> - } - } - }, - "settings": { - "sort.field": [ "@timestamp"], <2> - "sort.order": [ "desc"] <3> - } - } -} ----- -<1> Changes the `@timestamp` field mapping to the `date_nanos` field data type. -<2> Adds the `sort.field` index setting. -<3> Adds the `sort.order` index setting. --- - -. Use the <> to manually -create the new data stream. The name of the data stream must match the index -pattern defined in the new template's `index_patterns` property. -+ -We do not recommend <>. Later, you will reindex older data from an -existing data stream into this new stream. This could result in one or more -backing indices that contains a mix of new and old data. -+ -[IMPORTANT] -====== -[[data-stream-mix-new-old-data]] -*Mixing new and old data in a data stream* - -While mixing new and old data is safe, it could interfere with data retention. -If you delete older indices, you could accidentally delete a backing index that -contains both new and old data. To prevent premature data loss, you would need -to retain such a backing index until you are ready to delete its newest data. -====== -+ --- -The following create data stream API request targets `new-data-stream`, which -matches the index pattern for `new-data-stream-template`. -Because no existing index or data stream uses this name, this request creates -the `new-data-stream` data stream. - -[source,console] ----- -PUT /_data_stream/new-data-stream ----- -// TEST[s/new-data-stream/new-data-stream-two/] --- - -. If you do not want to mix new and old data in your new data stream, pause the -indexing of new documents. While mixing old and new data is safe, it could -interfere with data retention. See <>. - -. If you use {ilm-init} to <>, reduce the {ilm-init} poll interval. This ensures the current write -index doesn’t grow too large while waiting for the rollover check. By default, -{ilm-init} checks rollover conditions every 10 minutes. -+ --- -The following <> request -lowers the `indices.lifecycle.poll_interval` setting to `1m` (one minute). - -[source,console] ----- -PUT /_cluster/settings -{ - "persistent": { - "indices.lifecycle.poll_interval": "1m" - } -} ----- --- - -. Reindex your data to the new data stream using an `op_type` of `create`. -+ -If you want to partition the data in the order in which it was originally -indexed, you can run separate reindex requests. These reindex requests can use -individual backing indices as the source. You can use the -<> to retrieve a list of backing -indices. -+ --- -For example, you plan to reindex data from `my-data-stream` into -`new-data-stream`. However, you want to submit a separate reindex request for -each backing index in `my-data-stream`, starting with the oldest backing index. -This preserves the order in which the data was originally indexed. - -The following get data stream API request retrieves information about -`my-data-stream`, including a list of its backing indices. - -[source,console] ----- -GET /_data_stream/my-data-stream ----- - -The response's `indices` property contains an array of the stream's current -backing indices. The first item in the array contains information about the -stream's oldest backing index. - -[source,console-result] ----- -{ - "data_streams": [ - { - "name": "my-data-stream", - "timestamp_field": { - "name": "@timestamp" - }, - "indices": [ - { - "index_name": ".ds-my-data-stream-2099.03.07-000001", <1> - "index_uuid": "Gpdiyq8sRuK9WuthvAdFbw", - "prefer_ilm": true, - "managed_by": "Unmanaged" - }, - { - "index_name": ".ds-my-data-stream-2099.03.08-000002", - "index_uuid": "_eEfRrFHS9OyhqWntkgHAQ", - "prefer_ilm": true, - "managed_by": "Unmanaged" - } - ], - "generation": 2, - "status": "GREEN", - "next_generation_managed_by": "Unmanaged", - "prefer_ilm": true, - "template": "my-data-stream-template", - "hidden": false, - "system": false, - "allow_custom_routing": false, - "replicated": false, - "rollover_on_write": false - } - ] -} ----- -// TESTRESPONSE[s/"index_uuid": "Gpdiyq8sRuK9WuthvAdFbw"/"index_uuid": $body.data_streams.0.indices.0.index_uuid/] -// TESTRESPONSE[s/"index_uuid": "_eEfRrFHS9OyhqWntkgHAQ"/"index_uuid": $body.data_streams.0.indices.1.index_uuid/] -// TESTRESPONSE[s/"index_name": ".ds-my-data-stream-2099.03.07-000001"/"index_name": $body.data_streams.0.indices.0.index_name/] -// TESTRESPONSE[s/"index_name": ".ds-my-data-stream-2099.03.08-000002"/"index_name": $body.data_streams.0.indices.1.index_name/] -// TESTRESPONSE[s/"status": "GREEN"/"status": "YELLOW","failure_store":{"enabled": false, "indices": [], "rollover_on_write": true}/] - -<1> First item in the `indices` array for `my-data-stream`. This item contains -information about the stream's oldest backing index, -`.ds-my-data-stream-2099.03.07-000001`. - -The following <> request copies documents from -`.ds-my-data-stream-2099.03.07-000001` to `new-data-stream`. The request's -`op_type` is `create`. - -[source,console] ----- -POST /_reindex -{ - "source": { - "index": ".ds-my-data-stream-2099.03.07-000001" - }, - "dest": { - "index": "new-data-stream", - "op_type": "create" - } -} ----- -// TEST[setup:my_index] -// TEST[s/.ds-my-data-stream-2099.03.07-000001/my-index-000001/] --- -+ -You can also use a query to reindex only a subset of documents with each -request. -+ --- -The following <> request copies documents from -`my-data-stream` to `new-data-stream`. The request -uses a <> to only reindex documents with a -timestamp within the last week. Note the request's `op_type` is `create`. - -[source,console] ----- -POST /_reindex -{ - "source": { - "index": "my-data-stream", - "query": { - "range": { - "@timestamp": { - "gte": "now-7d/d", - "lte": "now/d" - } - } - } - }, - "dest": { - "index": "new-data-stream", - "op_type": "create" - } -} ----- --- - -. If you previously changed your {ilm-init} poll interval, change it back to its -original value when reindexing is complete. This prevents unnecessary load on -the master node. -+ --- -The following cluster update settings API request resets the -`indices.lifecycle.poll_interval` setting to its default value. - -[source,console] ----- -PUT /_cluster/settings -{ - "persistent": { - "indices.lifecycle.poll_interval": null - } -} ----- --- - -. Resume indexing using the new data stream. Searches on this stream will now -query your new data and the reindexed data. - -. Once you have verified that all reindexed data is available in the new -data stream, you can safely remove the old stream. -+ --- -The following <> request -deletes `my-data-stream`. This request also deletes the stream's -backing indices and any data they contain. - -[source,console] ----- -DELETE /_data_stream/my-data-stream ----- --- - -[discrete] -[[data-streams-change-alias]] -=== Update or add an alias to a data stream - -Use the <> to update an existing data stream's -aliases. Changing an existing data stream's aliases in its index pattern has no -effect. - -include::../alias.asciidoc[tag=alias-multiple-actions-example] diff --git a/docs/reference/data-streams/data-stream-apis.asciidoc b/docs/reference/data-streams/data-stream-apis.asciidoc deleted file mode 100644 index 8b952fad59f8d..0000000000000 --- a/docs/reference/data-streams/data-stream-apis.asciidoc +++ /dev/null @@ -1,68 +0,0 @@ -[role="xpack"] -[[data-stream-apis]] -== Data stream APIs - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-data-stream[Data stream APIs]. --- - -The following APIs are available for managing <>: - -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -[[data-stream-lifecycle-api]] -The following APIs are available for managing the built-in lifecycle of data streams: - -preview::[] - -* <> -preview:[] -* <> -preview:[] -* <> -preview:[] -* <> -preview:[] -* <> -preview:[] - -The following API is available for <>: - -* <> - - -For concepts and tutorials, see <>. - -include::{es-ref-dir}/indices/create-data-stream.asciidoc[] - -include::{es-ref-dir}/indices/delete-data-stream.asciidoc[] - -include::{es-ref-dir}/indices/get-data-stream.asciidoc[] - -include::{es-ref-dir}/indices/migrate-to-data-stream.asciidoc[] - -include::{es-ref-dir}/indices/data-stream-stats.asciidoc[] - -include::{es-ref-dir}/data-streams/promote-data-stream-api.asciidoc[] - -include::{es-ref-dir}/data-streams/modify-data-streams-api.asciidoc[] - -include::{es-ref-dir}/data-streams/lifecycle/apis/put-lifecycle.asciidoc[] - -include::{es-ref-dir}/data-streams/lifecycle/apis/get-lifecycle.asciidoc[] - -include::{es-ref-dir}/data-streams/lifecycle/apis/delete-lifecycle.asciidoc[] - -include::{es-ref-dir}/data-streams/lifecycle/apis/explain-lifecycle.asciidoc[] - -include::{es-ref-dir}/data-streams/lifecycle/apis/get-lifecycle-stats.asciidoc[] - -include::{es-ref-dir}/indices/downsample-data-stream.asciidoc[] diff --git a/docs/reference/data-streams/data-streams.asciidoc b/docs/reference/data-streams/data-streams.asciidoc deleted file mode 100644 index 1484e21febdb3..0000000000000 --- a/docs/reference/data-streams/data-streams.asciidoc +++ /dev/null @@ -1,161 +0,0 @@ -[role="xpack"] -[[data-streams]] -= Data streams -++++ -Data streams -++++ - -A data stream lets you store append-only time series -data across multiple indices while giving you a single named resource for -requests. Data streams are well-suited for logs, events, metrics, and other -continuously generated data. - -You can submit indexing and search requests directly to a data stream. The -stream automatically routes the request to backing indices that store the -stream's data. You can use <> to -automate the management of these backing indices. For example, you can use -{ilm-init} to automatically move older backing indices to less expensive -hardware and delete unneeded indices. {ilm-init} can help you reduce costs and -overhead as your data grows. - - -[discrete] -[[should-you-use-a-data-stream]] -== Should you use a data stream? - -To determine whether you should use a data stream for your data, you should consider the format of -the data, and your expected interaction. A good candidate for using a data stream will match the -following criteria: - -* Your data contains a timestamp field, or one could be automatically generated. -* You mostly perform indexing requests, with occasional updates and deletes. -* You index documents without an `_id`, or when indexing documents with an explicit `_id` you expect first-write-wins behavior. - -For most time series data use-cases, a data stream will be a good fit. However, if you find that -your data doesn't fit into these categories (for example, if you frequently send multiple documents -using the same `_id` expecting last-write-wins), you may want to use an index alias with a write -index instead. See documentation for <> for more information. - -Keep in mind that some features such as <> and -<> require a data stream. - -[discrete] -[[backing-indices]] -== Backing indices - -A data stream consists of one or more <>, auto-generated -backing indices. - -image::images/data-streams/data-streams-diagram.svg[align="center"] - -A data stream requires a matching <>. The -template contains the mappings and settings used to configure the stream's -backing indices. - -// tag::timestamp-reqs[] -Every document indexed to a data stream must contain a `@timestamp` field, -mapped as a <> or <> field type. If the -index template doesn't specify a mapping for the `@timestamp` field, {es} maps -`@timestamp` as a `date` field with default options. -// end::timestamp-reqs[] - -The same index template can be used for multiple data streams. You cannot -delete an index template in use by a data stream. - -The name pattern for the backing indices is an implementation detail and no -intelligence should be derived from it. The only invariant the holds is that -each data stream generation index will have a unique name. - -[discrete] -[[data-stream-read-requests]] -== Read requests - -When you submit a read request to a data stream, the stream routes the request -to all its backing indices. - -image::images/data-streams/data-streams-search-request.svg[align="center"] - -[discrete] -[[data-stream-write-index]] -== Write index - -The most recently created backing index is the data stream’s write index. -The stream adds new documents to this index only. - -image::images/data-streams/data-streams-index-request.svg[align="center"] - -You cannot add new documents to other backing indices, even by sending requests -directly to the index. - -You also cannot perform operations on a write index that may hinder indexing, -such as: - -* <> -* <> -* <> -* <> - -[discrete] -[[data-streams-rollover]] -== Rollover - -A <> creates a new backing index that becomes -the stream's new write index. - -We recommend using <> to automatically -roll over data streams when the write index reaches a specified age or size. -If needed, you can also <> -a data stream. - -[discrete] -[[data-streams-generation]] -== Generation - -Each data stream tracks its generation: a six-digit, zero-padded integer starting at `000001`. - -When a backing index is created, the index is named using the following -convention: - -[source,text] ----- -.ds--- ----- - -`` is the backing index's creation date. Backing indices with a -higher generation contain more recent data. For example, the `web-server-logs` -data stream has a generation of `34`. The stream's most recent backing index, -created on 7 March 2099, is named `.ds-web-server-logs-2099.03.07-000034`. - -Some operations, such as a <> or -<>, can change a backing index's name. -These name changes do not remove a backing index from its data stream. - -The generation of the data stream can change without a new index being added to -the data stream (e.g. when an existing backing index is shrunk). This means the -backing indices for some generations will never exist. -You should not derive any intelligence from the backing indices names. - -[discrete] -[[data-streams-append-only]] -== Append-only (mostly) - -Data streams are designed for use cases where existing data is rarely updated. You cannot send -update or deletion requests for existing documents directly to a data stream. However, you can still -<> in a data stream by submitting -requests directly to the document's backing index. - -If you need to update a larger number of documents in a data stream, you can use the -<> and -<> APIs. - -TIP: If you frequently send multiple documents using the same `_id` expecting last-write-wins, you -may want to use an index alias with a write index instead. See -<>. - -include::set-up-a-data-stream.asciidoc[] -include::use-a-data-stream.asciidoc[] -include::change-mappings-and-settings.asciidoc[] -include::tsds.asciidoc[] -include::logs.asciidoc[] -include::lifecycle/index.asciidoc[] diff --git a/docs/reference/data-streams/downsampling-dsl.asciidoc b/docs/reference/data-streams/downsampling-dsl.asciidoc deleted file mode 100644 index 0981c62ead03e..0000000000000 --- a/docs/reference/data-streams/downsampling-dsl.asciidoc +++ /dev/null @@ -1,565 +0,0 @@ -[[downsampling-dsl]] -=== Run downsampling using data stream lifecycle -++++ -Run downsampling using data stream lifecycle -++++ - -This is a simplified example that allows you to see quickly how -<> works as part of a datastream lifecycle to reduce the -storage size of a sampled set of metrics. The example uses typical Kubernetes -cluster monitoring data. To test out downsampling with data stream lifecycle, follow these steps: - -. Check the <>. -. <>. -. <>. -. <>. -. <>. -. <>. - -[discrete] -[[downsampling-dsl-prereqs]] -==== Prerequisites - -Refer to <>. - -[discrete] -[[downsampling-dsl-create-index-template]] -==== Create an index template with data stream lifecycle - -This creates an index template for a basic data stream. The available parameters -for an index template are described in detail in <>. - -For simplicity, in the time series mapping all `time_series_metric` parameters -are set to type `gauge`, but the `counter` metric type may also be used. The -`time_series_metric` values determine the kind of statistical representations -that are used during downsampling. - -The index template includes a set of static <>: `host`, `namespace`, `node`, and `pod`. The time series dimensions -are not changed by the downsampling process. - -To enable downsampling, this template includes a `lifecycle` section with <> object. `fixed_interval` parameter sets downsampling interval at which you want to aggregate the original time series data. `after` parameter specifies how much time after index was rolled over should pass before downsampling is performed. - -[source,console] ----- -PUT _index_template/datastream_template -{ - "index_patterns": [ - "datastream*" - ], - "data_stream": {}, - "template": { - "lifecycle": { - "downsampling": [ - { - "after": "1m", - "fixed_interval": "1h" - } - ] - }, - "settings": { - "index": { - "mode": "time_series" - } - }, - "mappings": { - "properties": { - "@timestamp": { - "type": "date" - }, - "kubernetes": { - "properties": { - "container": { - "properties": { - "cpu": { - "properties": { - "usage": { - "properties": { - "core": { - "properties": { - "ns": { - "type": "long" - } - } - }, - "limit": { - "properties": { - "pct": { - "type": "float" - } - } - }, - "nanocores": { - "type": "long", - "time_series_metric": "gauge" - }, - "node": { - "properties": { - "pct": { - "type": "float" - } - } - } - } - } - } - }, - "memory": { - "properties": { - "available": { - "properties": { - "bytes": { - "type": "long", - "time_series_metric": "gauge" - } - } - }, - "majorpagefaults": { - "type": "long" - }, - "pagefaults": { - "type": "long", - "time_series_metric": "gauge" - }, - "rss": { - "properties": { - "bytes": { - "type": "long", - "time_series_metric": "gauge" - } - } - }, - "usage": { - "properties": { - "bytes": { - "type": "long", - "time_series_metric": "gauge" - }, - "limit": { - "properties": { - "pct": { - "type": "float" - } - } - }, - "node": { - "properties": { - "pct": { - "type": "float" - } - } - } - } - }, - "workingset": { - "properties": { - "bytes": { - "type": "long", - "time_series_metric": "gauge" - } - } - } - } - }, - "name": { - "type": "keyword" - }, - "start_time": { - "type": "date" - } - } - }, - "host": { - "type": "keyword", - "time_series_dimension": true - }, - "namespace": { - "type": "keyword", - "time_series_dimension": true - }, - "node": { - "type": "keyword", - "time_series_dimension": true - }, - "pod": { - "type": "keyword", - "time_series_dimension": true - } - } - } - } - } - } -} ----- - -//// -[source,console] ----- -DELETE _index_template/* ----- -// TEST[continued] -//// - -[discrete] -[[downsampling-dsl-ingest-data]] -==== Ingest time series data - -Use a bulk API request to automatically create your TSDS and index a set of ten -documents. - -**Important:** Before running this bulk request you need to update the -timestamps to within three to five hours after your current time. That is, -search `2022-06-21T15` and replace with your present date, and adjust the hour -to your current time plus three hours. - -[source,console] ----- -PUT /datastream/_bulk?refresh -{"create": {}} -{"@timestamp":"2022-06-21T15:49:00Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":91153,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":463314616},"usage":{"bytes":307007078,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":585236},"rss":{"bytes":102728},"pagefaults":120901,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:45:50Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":124501,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":982546514},"usage":{"bytes":360035574,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1339884},"rss":{"bytes":381174},"pagefaults":178473,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:44:50Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":38907,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":862723768},"usage":{"bytes":379572388,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":431227},"rss":{"bytes":386580},"pagefaults":233166,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:44:40Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":86706,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":567160996},"usage":{"bytes":103266017,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1724908},"rss":{"bytes":105431},"pagefaults":233166,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:44:00Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":150069,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":639054643},"usage":{"bytes":265142477,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1786511},"rss":{"bytes":189235},"pagefaults":138172,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:42:40Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":82260,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":854735585},"usage":{"bytes":309798052,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":924058},"rss":{"bytes":110838},"pagefaults":259073,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:42:10Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":153404,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":279586406},"usage":{"bytes":214904955,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1047265},"rss":{"bytes":91914},"pagefaults":302252,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:40:20Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":125613,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":822782853},"usage":{"bytes":100475044,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":2109932},"rss":{"bytes":278446},"pagefaults":74843,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:40:10Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":100046,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":567160996},"usage":{"bytes":362826547,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1986724},"rss":{"bytes":402801},"pagefaults":296495,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:38:30Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":40018,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":1062428344},"usage":{"bytes":265142477,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":2294743},"rss":{"bytes":340623},"pagefaults":224530,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} - ----- -// TEST[skip: timestamp values won't match an accepted range in the TSDS] - -[discrete] -[[downsampling-dsl-view-data-stream-state]] -==== View current state of data stream - -Now that you've created and added documents to the data stream, check to confirm -the current state of the new index. - -[source,console] ----- -GET _data_stream ----- -// TEST[skip: temporal_ranges and index names won't match] - -If the data stream lifecycle policy has not yet been applied, your results will be like the -following. Note the original `index_name`: `.ds-datastream-2024.04.29-000001`. - -[source,console-result] ----- -{ - "data_streams": [ - { - "name": "datastream", - "timestamp_field": { - "name": "@timestamp" - }, - "indices": [ - { - "index_name": ".ds-datastream-2024.04.29-000001", - "index_uuid": "vUMNtCyXQhGdlo1BD-cGRw", - "managed_by": "Data stream lifecycle" - } - ], - "generation": 1, - "status": "GREEN", - "template": "datastream_template", - "lifecycle": { - "enabled": true, - "downsampling": [ - { - "after": "1m", - "fixed_interval": "1h" - } - ] - }, - "next_generation_managed_by": "Data stream lifecycle", - "hidden": false, - "system": false, - "allow_custom_routing": false, - "replicated": false, - "rollover_on_write": false, - "time_series": { - "temporal_ranges": [ - { - "start": "2024-04-29T15:55:46.000Z", - "end": "2024-04-29T18:25:46.000Z" - } - ] - } - } - ] -} ----- -// TEST[skip: some fields are removed for brevity] -// TEST[continued] - -Next, run a search query: - -[source,console] ----- -GET datastream/_search ----- -// TEST[skip: timestamp values won't match] - -The query returns your ten newly added documents. - -[source,console-result] ----- -{ - "took": 23, - "timed_out": false, - "_shards": { - "total": 1, - "successful": 1, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 10, - "relation": "eq" - }, -... ----- -// TEST[skip: some fields are removed for brevity] -// TEST[continued] - -[discrete] -[[downsampling-dsl-rollover]] -==== Roll over the data stream - -Data stream lifecycle will automatically roll over data stream and perform downsampling. This step is only needed in order to see downsampling results in scope of this tutorial. - -Roll over the data stream using the <>: - -[source,console] ----- -POST /datastream/_rollover/ ----- -// TEST[continued] - -[discrete] -[[downsampling-dsl-view-results]] -==== View downsampling results - -By default, data stream lifecycle actions are executed every five minutes. Downsampling takes place after the index is rolled over and the <> -has lapsed as the source index is still expected to receive major writes until then. Index is now rolled over after previous step but its time series range end is likely still in the future. Once index time series range is in the past, re-run the `GET _data_stream` request. - -[source,console] ----- -GET _data_stream ----- -// TEST[skip: temporal_ranges and index names won't match] - -After the data stream lifecycle action was executed, original -`.ds-datastream-2024.04.29-000001` index is replaced with a new, downsampled -index, in this case `downsample-1h-.ds-datastream-2024.04.29-000001`. - -[source,console-result] ----- -{ - "data_streams": [ - { - "name": "datastream", - "timestamp_field": { - "name": "@timestamp" - }, - "indices": [ - { - "index_name": "downsample-1h-.ds-datastream-2024.04.29-000001", - "index_uuid": "VqXuShP4T8ODAOnWFcqitg", - "managed_by": "Data stream lifecycle" - }, - { - "index_name": ".ds-datastream-2024.04.29-000002", - "index_uuid": "8gCeSdjUSWG-o-PeEAJ0jA", - "managed_by": "Data stream lifecycle" - } - ], -... ----- -// TEST[skip: some fields are removed for brevity] -// TEST[continued] - -Run a search query on the datastream (note that when querying downsampled indices there are <>). - -[source,console] ----- -GET datastream/_search ----- -// TEST[continued] - -The new downsampled index contains just one document that includes the `min`, -`max`, `sum`, and `value_count` statistics based off of the original sampled -metrics. - -[source,console-result] ----- -{ - "took": 26, - "timed_out": false, - "_shards": { - "total": 2, - "successful": 2, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 1, - "relation": "eq" - }, - "max_score": 1, - "hits": [ - { - "_index": "downsample-1h-.ds-datastream-2024.04.29-000001", - "_id": "0eL0wMf38sl_s5JnAAABjyrMjoA", - "_score": 1, - "_source": { - "@timestamp": "2024-04-29T17:00:00.000Z", - "_doc_count": 10, - "kubernetes": { - "container": { - "cpu": { - "usage": { - "core": { - "ns": 12828317850 - }, - "limit": { - "pct": 0.0000277905 - }, - "nanocores": { - "min": 38907, - "max": 153404, - "sum": 992677, - "value_count": 10 - }, - "node": { - "pct": 0.0000277905 - } - } - }, - "memory": { - "available": { - "bytes": { - "min": 279586406, - "max": 1062428344, - "sum": 7101494721, - "value_count": 10 - } - }, - "majorpagefaults": 0, - "pagefaults": { - "min": 74843, - "max": 302252, - "sum": 2061071, - "value_count": 10 - }, - "rss": { - "bytes": { - "min": 91914, - "max": 402801, - "sum": 2389770, - "value_count": 10 - } - }, - "usage": { - "bytes": { - "min": 100475044, - "max": 379572388, - "sum": 2668170609, - "value_count": 10 - }, - "limit": { - "pct": 0.00009923134 - }, - "node": { - "pct": 0.017700378 - } - }, - "workingset": { - "bytes": { - "min": 431227, - "max": 2294743, - "sum": 14230488, - "value_count": 10 - } - } - }, - "name": "container-name-44", - "start_time": "2021-03-30T07:59:06.000Z" - }, - "host": "gke-apps-0", - "namespace": "namespace26", - "node": "gke-apps-0-0", - "pod": "gke-apps-0-0-0" - } - } - } - ] - } -} ----- -// TEST[skip: timestamp values won't match] -// TEST[continued] - -Use the <> to get statistics for -the data stream, including the storage size. - -[source,console] ----- -GET /_data_stream/datastream/_stats?human=true ----- -// TEST[continued] - -[source,console-result] ----- -{ - "_shards": { - "total": 4, - "successful": 4, - "failed": 0 - }, - "data_stream_count": 1, - "backing_indices": 2, - "total_store_size": "37.3kb", - "total_store_size_bytes": 38230, - "data_streams": [ - { - "data_stream": "datastream", - "backing_indices": 2, - "store_size": "37.3kb", - "store_size_bytes": 38230, - "maximum_timestamp": 1714410000000 - } - ] -} ----- -// TEST[skip: exact size may be different] -// TEST[continued] - -This example demonstrates how downsampling works as part of a data stream lifecycle to -reduce the storage size of metrics data as it becomes less current and less -frequently queried. - -//// -[source,console] ----- -DELETE _data_stream/* -DELETE _index_template/* ----- -// TEST[continued] -//// diff --git a/docs/reference/data-streams/downsampling-ilm.asciidoc b/docs/reference/data-streams/downsampling-ilm.asciidoc deleted file mode 100644 index 79af7225ed1ad..0000000000000 --- a/docs/reference/data-streams/downsampling-ilm.asciidoc +++ /dev/null @@ -1,558 +0,0 @@ -[[downsampling-ilm]] -=== Run downsampling with ILM -++++ -Run downsampling with ILM -++++ - -This is a simplified example that allows you to see quickly how -<> works as part of an ILM policy to reduce the -storage size of a sampled set of metrics. The example uses typical Kubernetes -cluster monitoring data. To test out downsampling with ILM, follow these steps: - -. Check the <>. -. <>. -. <>. -. <>. -. <>. - -[discrete] -[[downsampling-ilm-prereqs]] -==== Prerequisites - -Refer to <>. - -Before running this example you may want to try the -<> example. - -[discrete] -[[downsampling-ilm-policy]] -==== Create an index lifecycle policy - -Create an ILM policy for your time series data. While not required, an ILM -policy is recommended to automate the management of your time series data stream -indices. - -To enable downsampling, add a <> and set -<> to the downsampling interval at -which you want to aggregate the original time series data. - -In this example, an ILM policy is configured for the `hot` phase. The downsample -takes place after the index is rolled over and the <> -has lapsed as the source index is still expected to receive major writes until then. -{ilm-cap} will not proceed with any action that expects the index to not receive -writes anymore until the <> has -passed. The {ilm-cap} actions that wait on the end time before proceeding are: -- <> -- <> -- <> -- <> -- <> -- <> - -[source,console] ----- -PUT _ilm/policy/datastream_policy -{ - "policy": { - "phases": { - "hot": { - "actions": { - "rollover" : { - "max_age": "5m" - }, - "downsample": { - "fixed_interval": "1h" - } - } - } - } - } -} ----- - -[discrete] -[[downsampling-ilm-create-index-template]] -==== Create an index template - -This creates an index template for a basic data stream. The available parameters -for an index template are described in detail in <>. - -For simplicity, in the time series mapping all `time_series_metric` parameters -are set to type `gauge`, but the `counter` metric type may also be used. The -`time_series_metric` values determine the kind of statistical representations -that are used during downsampling. - -The index template includes a set of static <>: `host`, `namespace`, `node`, and `pod`. The time series dimensions -are not changed by the downsampling process. - -[source,console] ----- -PUT _index_template/datastream_template -{ - "index_patterns": [ - "datastream*" - ], - "data_stream": {}, - "template": { - "settings": { - "index": { - "mode": "time_series", - "number_of_replicas": 0, - "number_of_shards": 2 - }, - "index.lifecycle.name": "datastream_policy" - }, - "mappings": { - "properties": { - "@timestamp": { - "type": "date" - }, - "kubernetes": { - "properties": { - "container": { - "properties": { - "cpu": { - "properties": { - "usage": { - "properties": { - "core": { - "properties": { - "ns": { - "type": "long" - } - } - }, - "limit": { - "properties": { - "pct": { - "type": "float" - } - } - }, - "nanocores": { - "type": "long", - "time_series_metric": "gauge" - }, - "node": { - "properties": { - "pct": { - "type": "float" - } - } - } - } - } - } - }, - "memory": { - "properties": { - "available": { - "properties": { - "bytes": { - "type": "long", - "time_series_metric": "gauge" - } - } - }, - "majorpagefaults": { - "type": "long" - }, - "pagefaults": { - "type": "long", - "time_series_metric": "gauge" - }, - "rss": { - "properties": { - "bytes": { - "type": "long", - "time_series_metric": "gauge" - } - } - }, - "usage": { - "properties": { - "bytes": { - "type": "long", - "time_series_metric": "gauge" - }, - "limit": { - "properties": { - "pct": { - "type": "float" - } - } - }, - "node": { - "properties": { - "pct": { - "type": "float" - } - } - } - } - }, - "workingset": { - "properties": { - "bytes": { - "type": "long", - "time_series_metric": "gauge" - } - } - } - } - }, - "name": { - "type": "keyword" - }, - "start_time": { - "type": "date" - } - } - }, - "host": { - "type": "keyword", - "time_series_dimension": true - }, - "namespace": { - "type": "keyword", - "time_series_dimension": true - }, - "node": { - "type": "keyword", - "time_series_dimension": true - }, - "pod": { - "type": "keyword", - "time_series_dimension": true - } - } - } - } - } - } -} ----- -// TEST[continued] - -//// -[source,console] ----- -DELETE _index_template/* ----- -// TEST[continued] -//// - -[discrete] -[[downsampling-ilm-ingest-data]] -==== Ingest time series data - -Use a bulk API request to automatically create your TSDS and index a set of ten -documents. - -**Important:** Before running this bulk request you need to update the -timestamps to within three to five hours after your current time. That is, -search `2022-06-21T15` and replace with your present date, and adjust the hour -to your current time plus three hours. - -[source,console] ----- -PUT /datastream/_bulk?refresh -{"create": {}} -{"@timestamp":"2022-06-21T15:49:00Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":91153,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":463314616},"usage":{"bytes":307007078,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":585236},"rss":{"bytes":102728},"pagefaults":120901,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:45:50Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":124501,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":982546514},"usage":{"bytes":360035574,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1339884},"rss":{"bytes":381174},"pagefaults":178473,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:44:50Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":38907,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":862723768},"usage":{"bytes":379572388,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":431227},"rss":{"bytes":386580},"pagefaults":233166,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:44:40Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":86706,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":567160996},"usage":{"bytes":103266017,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1724908},"rss":{"bytes":105431},"pagefaults":233166,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:44:00Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":150069,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":639054643},"usage":{"bytes":265142477,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1786511},"rss":{"bytes":189235},"pagefaults":138172,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:42:40Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":82260,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":854735585},"usage":{"bytes":309798052,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":924058},"rss":{"bytes":110838},"pagefaults":259073,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:42:10Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":153404,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":279586406},"usage":{"bytes":214904955,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1047265},"rss":{"bytes":91914},"pagefaults":302252,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:40:20Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":125613,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":822782853},"usage":{"bytes":100475044,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":2109932},"rss":{"bytes":278446},"pagefaults":74843,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:40:10Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":100046,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":567160996},"usage":{"bytes":362826547,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1986724},"rss":{"bytes":402801},"pagefaults":296495,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:38:30Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":40018,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":1062428344},"usage":{"bytes":265142477,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":2294743},"rss":{"bytes":340623},"pagefaults":224530,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} - ----- -// TEST[skip: The @timestamp value won't match an accepted range in the TSDS] - -[discrete] -[[downsampling-ilm-view-results]] -==== View the results - -Now that you've created and added documents to the data stream, check to confirm -the current state of the new index. - -[source,console] ----- -GET _data_stream ----- -// TEST[skip: The @timestamp value won't match an accepted range in the TSDS] - -If the ILM policy has not yet been applied, your results will be like the -following. Note the original `index_name`: `.ds-datastream--000001`. - -[source,console-result] ----- -{ - "data_streams": [ - { - "name": "datastream", - "timestamp_field": { - "name": "@timestamp" - }, - "indices": [ - { - "index_name": ".ds-datastream-2022.08.26-000001", - "index_uuid": "5g-3HrfETga-5EFKBM6R-w" - }, - { - "index_name": ".ds-datastream-2022.08.26-000002", - "index_uuid": "o0yRTdhWSo2pY8XMvfwy7Q" - } - ], - "generation": 2, - "status": "GREEN", - "template": "datastream_template", - "ilm_policy": "datastream_policy", - "hidden": false, - "system": false, - "allow_custom_routing": false, - "replicated": false, - "rollover_on_write": false, - "time_series": { - "temporal_ranges": [ - { - "start": "2022-08-26T13:29:07.000Z", - "end": "2022-08-26T19:29:07.000Z" - } - ] - } - } - ] -} ----- -// TEST[skip:todo] -// TEST[continued] - -Next, run a search query: - -[source,console] ----- -GET datastream/_search ----- -// TEST[skip: The @timestamp value won't match an accepted range in the TSDS] - -The query returns your ten newly added documents. - -[source,console-result] ----- -{ - "took": 17, - "timed_out": false, - "_shards": { - "total": 4, - "successful": 4, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 10, - "relation": "eq" - }, -... ----- -// TEST[skip:todo] -// TEST[continued] - -By default, index lifecycle management checks every ten minutes for indices that -meet policy criteria. Wait for about ten minutes (maybe brew up a quick coffee -or tea ☕ ) and then re-run the `GET _data_stream` request. - -[source,console] ----- -GET _data_stream ----- -// TEST[skip: The @timestamp value won't match an accepted range in the TSDS] - -After the ILM policy has taken effect, the original -`.ds-datastream-2022.08.26-000001` index is replaced with a new, downsampled -index, in this case `downsample-6tkn-.ds-datastream-2022.08.26-000001`. - -[source,console-result] ----- -{ - "data_streams": [ - { - "name": "datastream", - "timestamp_field": { - "name": "@timestamp" - }, - "indices": [ - { - "index_name": "downsample-6tkn-.ds-datastream-2022.08.26-000001", - "index_uuid": "qRane1fQQDCNgKQhXmTIvg" - }, - { - "index_name": ".ds-datastream-2022.08.26-000002", - "index_uuid": "o0yRTdhWSo2pY8XMvfwy7Q" - } - ], -... ----- -// TEST[skip:todo] -// TEST[continued] - -Run a search query on the datastream (note that when querying downsampled indices there are <>). - -[source,console] ----- -GET datastream/_search ----- -// TEST[continued] - -The new downsampled index contains just one document that includes the `min`, -`max`, `sum`, and `value_count` statistics based off of the original sampled -metrics. - -[source,console-result] ----- -{ - "took": 6, - "timed_out": false, - "_shards": { - "total": 4, - "successful": 4, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 1, - "relation": "eq" - }, - "max_score": 1, - "hits": [ - { - "_index": "downsample-6tkn-.ds-datastream-2022.08.26-000001", - "_id": "0eL0wC_4-45SnTNFAAABgtpz0wA", - "_score": 1, - "_source": { - "@timestamp": "2022-08-26T14:00:00.000Z", - "_doc_count": 10, - "kubernetes.host": "gke-apps-0", - "kubernetes.namespace": "namespace26", - "kubernetes.node": "gke-apps-0-0", - "kubernetes.pod": "gke-apps-0-0-0", - "kubernetes.container.cpu.usage.nanocores": { - "min": 38907, - "max": 153404, - "sum": 992677, - "value_count": 10 - }, - "kubernetes.container.memory.available.bytes": { - "min": 279586406, - "max": 1062428344, - "sum": 7101494721, - "value_count": 10 - }, - "kubernetes.container.memory.pagefaults": { - "min": 74843, - "max": 302252, - "sum": 2061071, - "value_count": 10 - }, - "kubernetes.container.memory.rss.bytes": { - "min": 91914, - "max": 402801, - "sum": 2389770, - "value_count": 10 - }, - "kubernetes.container.memory.usage.bytes": { - "min": 100475044, - "max": 379572388, - "sum": 2668170609, - "value_count": 10 - }, - "kubernetes.container.memory.workingset.bytes": { - "min": 431227, - "max": 2294743, - "sum": 14230488, - "value_count": 10 - }, - "kubernetes.container.cpu.usage.core.ns": 12828317850, - "kubernetes.container.cpu.usage.limit.pct": 0.000027790500098490156, - "kubernetes.container.cpu.usage.node.pct": 0.000027790500098490156, - "kubernetes.container.memory.majorpagefaults": 0, - "kubernetes.container.memory.usage.limit.pct": 0.00009923134348355234, - "kubernetes.container.memory.usage.node.pct": 0.017700377851724625, - "kubernetes.container.name": "container-name-44", - "kubernetes.container.start_time": "2021-03-30T07:59:06.000Z" - } - } - ] - } -} ----- -// TEST[skip:todo] -// TEST[continued] - -Use the <> to get statistics for -the data stream, including the storage size. - -[source,console] ----- -GET /_data_stream/datastream/_stats?human=true ----- -// TEST[continued] - -[source,console-result] ----- -{ - "_shards": { - "total": 4, - "successful": 4, - "failed": 0 - }, - "data_stream_count": 1, - "backing_indices": 2, - "total_store_size": "16.6kb", - "total_store_size_bytes": 17059, - "data_streams": [ - { - "data_stream": "datastream", - "backing_indices": 2, - "store_size": "16.6kb", - "store_size_bytes": 17059, - "maximum_timestamp": 1661522400000 - } - ] -} ----- -// TEST[skip:todo] -// TEST[continued] - -This example demonstrates how downsampling works as part of an ILM policy to -reduce the storage size of metrics data as it becomes less current and less -frequently queried. - -You can also try our <> -example to learn how downsampling can work outside of an ILM policy. - -//// -[source,console] ----- -DELETE _data_stream/* -DELETE _index_template/* -DELETE _ilm/policy/datastream_policy ----- -// TEST[continued] -//// diff --git a/docs/reference/data-streams/downsampling-manual.asciidoc b/docs/reference/data-streams/downsampling-manual.asciidoc deleted file mode 100644 index 44ae77d072034..0000000000000 --- a/docs/reference/data-streams/downsampling-manual.asciidoc +++ /dev/null @@ -1,638 +0,0 @@ -[[downsampling-manual]] -=== Run downsampling manually -++++ -Run downsampling manually -++++ - -//// -[source,console] ----- -DELETE _data_stream/my-data-stream -DELETE _index_template/my-data-stream-template -DELETE _ingest/pipeline/my-timestamp-pipeline ----- -// TEARDOWN -//// - -The recommended way to <> a <> is -<>. However, if -you're not using ILM, you can downsample a TSDS manually. This guide shows you -how, using typical Kubernetes cluster monitoring data. - -To test out manual downsampling, follow these steps: - -. Check the <>. -. <>. -. <>. -. <>. -. <>. - -[discrete] -[[downsampling-manual-prereqs]] -==== Prerequisites - -* Refer to the <>. -* It is not possible to downsample a <> directly, nor -multiple indices at once. It's only possible to downsample one time series index -(TSDS backing index). -* In order to downsample an index, it needs to be read-only. For a TSDS write -index, this means it needs to be rolled over and made read-only first. -* Downsampling uses UTC timestamps. -* Downsampling needs at least one metric field to exist in the time series -index. - -[discrete] -[[downsampling-manual-create-index]] -==== Create a time series data stream - -First, you'll create a TSDS. For simplicity, in the time series mapping all -`time_series_metric` parameters are set to type `gauge`, but -<> such as `counter` and `histogram` may also -be used. The `time_series_metric` values determine the kind of statistical -representations that are used during downsampling. - -The index template includes a set of static -<>: `host`, `namespace`, -`node`, and `pod`. The time series dimensions are not changed by the -downsampling process. - -[source,console] ----- -PUT _index_template/my-data-stream-template -{ - "index_patterns": [ - "my-data-stream*" - ], - "data_stream": {}, - "template": { - "settings": { - "index": { - "mode": "time_series", - "routing_path": [ - "kubernetes.namespace", - "kubernetes.host", - "kubernetes.node", - "kubernetes.pod" - ], - "number_of_replicas": 0, - "number_of_shards": 2 - } - }, - "mappings": { - "properties": { - "@timestamp": { - "type": "date" - }, - "kubernetes": { - "properties": { - "container": { - "properties": { - "cpu": { - "properties": { - "usage": { - "properties": { - "core": { - "properties": { - "ns": { - "type": "long" - } - } - }, - "limit": { - "properties": { - "pct": { - "type": "float" - } - } - }, - "nanocores": { - "type": "long", - "time_series_metric": "gauge" - }, - "node": { - "properties": { - "pct": { - "type": "float" - } - } - } - } - } - } - }, - "memory": { - "properties": { - "available": { - "properties": { - "bytes": { - "type": "long", - "time_series_metric": "gauge" - } - } - }, - "majorpagefaults": { - "type": "long" - }, - "pagefaults": { - "type": "long", - "time_series_metric": "gauge" - }, - "rss": { - "properties": { - "bytes": { - "type": "long", - "time_series_metric": "gauge" - } - } - }, - "usage": { - "properties": { - "bytes": { - "type": "long", - "time_series_metric": "gauge" - }, - "limit": { - "properties": { - "pct": { - "type": "float" - } - } - }, - "node": { - "properties": { - "pct": { - "type": "float" - } - } - } - } - }, - "workingset": { - "properties": { - "bytes": { - "type": "long", - "time_series_metric": "gauge" - } - } - } - } - }, - "name": { - "type": "keyword" - }, - "start_time": { - "type": "date" - } - } - }, - "host": { - "type": "keyword", - "time_series_dimension": true - }, - "namespace": { - "type": "keyword", - "time_series_dimension": true - }, - "node": { - "type": "keyword", - "time_series_dimension": true - }, - "pod": { - "type": "keyword", - "time_series_dimension": true - } - } - } - } - } - } -} ----- - -[discrete] -[[downsampling-manual-ingest-data]] -==== Ingest time series data - -Because time series data streams have been designed to -<>, in this example, you'll -use an ingest pipeline to time-shift the data as it gets indexed. As a result, -the indexed data will have an `@timestamp` from the last 15 minutes. - -Create the pipeline with this request: - -[source,console] ----- -PUT _ingest/pipeline/my-timestamp-pipeline -{ - "description": "Shifts the @timestamp to the last 15 minutes", - "processors": [ - { - "set": { - "field": "ingest_time", - "value": "{{_ingest.timestamp}}" - } - }, - { - "script": { - "lang": "painless", - "source": """ - def delta = ChronoUnit.SECONDS.between( - ZonedDateTime.parse("2022-06-21T15:49:00Z"), - ZonedDateTime.parse(ctx["ingest_time"]) - ); - ctx["@timestamp"] = ZonedDateTime.parse(ctx["@timestamp"]).plus(delta,ChronoUnit.SECONDS).toString(); - """ - } - } - ] -} ----- -// TEST[continued] - -Next, use a bulk API request to automatically create your TSDS and index a set -of ten documents: - -[source,console] ----- -PUT /my-data-stream/_bulk?refresh&pipeline=my-timestamp-pipeline -{"create": {}} -{"@timestamp":"2022-06-21T15:49:00Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":91153,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":463314616},"usage":{"bytes":307007078,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":585236},"rss":{"bytes":102728},"pagefaults":120901,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:45:50Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":124501,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":982546514},"usage":{"bytes":360035574,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1339884},"rss":{"bytes":381174},"pagefaults":178473,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:44:50Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":38907,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":862723768},"usage":{"bytes":379572388,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":431227},"rss":{"bytes":386580},"pagefaults":233166,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:44:40Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":86706,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":567160996},"usage":{"bytes":103266017,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1724908},"rss":{"bytes":105431},"pagefaults":233166,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:44:00Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":150069,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":639054643},"usage":{"bytes":265142477,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1786511},"rss":{"bytes":189235},"pagefaults":138172,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:42:40Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":82260,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":854735585},"usage":{"bytes":309798052,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":924058},"rss":{"bytes":110838},"pagefaults":259073,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:42:10Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":153404,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":279586406},"usage":{"bytes":214904955,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1047265},"rss":{"bytes":91914},"pagefaults":302252,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:40:20Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":125613,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":822782853},"usage":{"bytes":100475044,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":2109932},"rss":{"bytes":278446},"pagefaults":74843,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:40:10Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":100046,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":567160996},"usage":{"bytes":362826547,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":1986724},"rss":{"bytes":402801},"pagefaults":296495,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} -{"create": {}} -{"@timestamp":"2022-06-21T15:38:30Z","kubernetes":{"host":"gke-apps-0","node":"gke-apps-0-0","pod":"gke-apps-0-0-0","container":{"cpu":{"usage":{"nanocores":40018,"core":{"ns":12828317850},"node":{"pct":2.77905e-05},"limit":{"pct":2.77905e-05}}},"memory":{"available":{"bytes":1062428344},"usage":{"bytes":265142477,"node":{"pct":0.01770037710617187},"limit":{"pct":9.923134671484496e-05}},"workingset":{"bytes":2294743},"rss":{"bytes":340623},"pagefaults":224530,"majorpagefaults":0},"start_time":"2021-03-30T07:59:06Z","name":"container-name-44"},"namespace":"namespace26"}} ----- -// TEST[continued] - -You can use the search API to check if the documents have been indexed -correctly: - -[source,console] ----- -GET /my-data-stream/_search ----- -// TEST[continued] - -Run the following aggregation on the data to calculate some interesting -statistics: - -[source,console] ----- -GET /my-data-stream/_search -{ - "size": 0, - "aggs": { - "tsid": { - "terms": { - "field": "_tsid" - }, - "aggs": { - "over_time": { - "date_histogram": { - "field": "@timestamp", - "fixed_interval": "1d" - }, - "aggs": { - "min": { - "min": { - "field": "kubernetes.container.memory.usage.bytes" - } - }, - "max": { - "max": { - "field": "kubernetes.container.memory.usage.bytes" - } - }, - "avg": { - "avg": { - "field": "kubernetes.container.memory.usage.bytes" - } - } - } - } - } - } - } -} ----- -// TEST[continued] - -[discrete] -[[downsampling-manual-run]] -==== Downsample the TSDS - -A TSDS can't be downsampled directly. You need to downsample its backing indices -instead. You can see the backing index for your data stream by running: - -[source,console] ----- -GET /_data_stream/my-data-stream ----- -// TEST[continued] - -This returns: - -[source,console-result] ----- -{ - "data_streams": [ - { - "name": "my-data-stream", - "timestamp_field": { - "name": "@timestamp" - }, - "indices": [ - { - "index_name": ".ds-my-data-stream-2023.07.26-000001", <1> - "index_uuid": "ltOJGmqgTVm4T-Buoe7Acg", - "prefer_ilm": true, - "managed_by": "Unmanaged" - } - ], - "generation": 1, - "status": "GREEN", - "next_generation_managed_by": "Unmanaged", - "prefer_ilm": true, - "template": "my-data-stream-template", - "hidden": false, - "system": false, - "allow_custom_routing": false, - "replicated": false, - "rollover_on_write": false, - "time_series": { - "temporal_ranges": [ - { - "start": "2023-07-26T09:26:42.000Z", - "end": "2023-07-26T13:26:42.000Z" - } - ] - } - } - ] -} ----- -// TESTRESPONSE[s/".ds-my-data-stream-2023.07.26-000001"/$body.data_streams.0.indices.0.index_name/] -// TESTRESPONSE[s/"ltOJGmqgTVm4T-Buoe7Acg"/$body.data_streams.0.indices.0.index_uuid/] -// TESTRESPONSE[s/"2023-07-26T09:26:42.000Z"/$body.data_streams.0.time_series.temporal_ranges.0.start/] -// TESTRESPONSE[s/"2023-07-26T13:26:42.000Z"/$body.data_streams.0.time_series.temporal_ranges.0.end/] -// TESTRESPONSE[s/"replicated": false/"replicated": false,"failure_store":{"enabled": false, "indices": [], "rollover_on_write": true}/] -<1> The backing index for this data stream. - -Before a backing index can be downsampled, the TSDS needs to be rolled over and -the old index needs to be made read-only. - -Roll over the TSDS using the <>: - -[source,console] ----- -POST /my-data-stream/_rollover/ ----- -// TEST[continued] - -Copy the name of the `old_index` from the response. In the following steps, -replace the index name with that of your `old_index`. - -The old index needs to be set to read-only mode. Run the following request: - -[source,console] ----- -PUT /.ds-my-data-stream-2023.07.26-000001/_block/write ----- -// TEST[skip:We don't know the index name at test time] - -Next, use the <> to downsample -the index, setting the time series interval to one hour: - -[source,console] ----- -POST /.ds-my-data-stream-2023.07.26-000001/_downsample/.ds-my-data-stream-2023.07.26-000001-downsample -{ - "fixed_interval": "1h" -} ----- -// TEST[skip:We don't know the index name at test time] - -Now you can <>, and replace the -original index with the downsampled one: - -[source,console] ----- -POST _data_stream/_modify -{ - "actions": [ - { - "remove_backing_index": { - "data_stream": "my-data-stream", - "index": ".ds-my-data-stream-2023.07.26-000001" - } - }, - { - "add_backing_index": { - "data_stream": "my-data-stream", - "index": ".ds-my-data-stream-2023.07.26-000001-downsample" - } - } - ] -} ----- -// TEST[skip:We don't know the index name at test time] - -You can now delete the old backing index. But be aware this will delete the -original data. Don't delete the index if you may need the original data in the -future. - -[discrete] -[[downsampling-manual-view-results]] -==== View the results - -Re-run the earlier search query (note that when querying downsampled indices -there are <>): - -[source,console] ----- -GET /my-data-stream/_search ----- -// TEST[skip:Because we've skipped the previous steps] - -The TSDS with the new downsampled backing index contains just one document. For -counters, this document would only have the last value. For gauges, the field -type is now `aggregate_metric_double`. You see the `min`, `max`, `sum`, and -`value_count` statistics based off of the original sampled metrics: - -[source,console-result] ----- -{ - "took": 2, - "timed_out": false, - "_shards": { - "total": 4, - "successful": 4, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 1, - "relation": "eq" - }, - "max_score": 1, - "hits": [ - { - "_index": ".ds-my-data-stream-2023.07.26-000001-downsample", - "_id": "0eL0wC_4-45SnTNFAAABiZHbD4A", - "_score": 1, - "_source": { - "@timestamp": "2023-07-26T11:00:00.000Z", - "_doc_count": 10, - "ingest_time": "2023-07-26T11:26:42.715Z", - "kubernetes": { - "container": { - "cpu": { - "usage": { - "core": { - "ns": 12828317850 - }, - "limit": { - "pct": 0.0000277905 - }, - "nanocores": { - "min": 38907, - "max": 153404, - "sum": 992677, - "value_count": 10 - }, - "node": { - "pct": 0.0000277905 - } - } - }, - "memory": { - "available": { - "bytes": { - "min": 279586406, - "max": 1062428344, - "sum": 7101494721, - "value_count": 10 - } - }, - "majorpagefaults": 0, - "pagefaults": { - "min": 74843, - "max": 302252, - "sum": 2061071, - "value_count": 10 - }, - "rss": { - "bytes": { - "min": 91914, - "max": 402801, - "sum": 2389770, - "value_count": 10 - } - }, - "usage": { - "bytes": { - "min": 100475044, - "max": 379572388, - "sum": 2668170609, - "value_count": 10 - }, - "limit": { - "pct": 0.00009923134 - }, - "node": { - "pct": 0.017700378 - } - }, - "workingset": { - "bytes": { - "min": 431227, - "max": 2294743, - "sum": 14230488, - "value_count": 10 - } - } - }, - "name": "container-name-44", - "start_time": "2021-03-30T07:59:06.000Z" - }, - "host": "gke-apps-0", - "namespace": "namespace26", - "node": "gke-apps-0-0", - "pod": "gke-apps-0-0-0" - } - } - } - ] - } -} ----- -// TEST[skip:Because we've skipped the previous step] - -Re-run the earlier aggregation. Even though the aggregation runs on the -downsampled TSDS that only contains 1 document, it returns the same results as -the earlier aggregation on the original TSDS. - -[source,console] ----- -GET /my-data-stream/_search -{ - "size": 0, - "aggs": { - "tsid": { - "terms": { - "field": "_tsid" - }, - "aggs": { - "over_time": { - "date_histogram": { - "field": "@timestamp", - "fixed_interval": "1d" - }, - "aggs": { - "min": { - "min": { - "field": "kubernetes.container.memory.usage.bytes" - } - }, - "max": { - "max": { - "field": "kubernetes.container.memory.usage.bytes" - } - }, - "avg": { - "avg": { - "field": "kubernetes.container.memory.usage.bytes" - } - } - } - } - } - } - } -} ----- -// TEST[skip:Because we've skipped the previous steps] - -This example demonstrates how downsampling can dramatically reduce the number of -documents stored for time series data, within whatever time boundaries you -choose. It's also possible to perform downsampling on already downsampled data, -to further reduce storage and associated costs, as the time series data ages and -the data resolution becomes less critical. - -The recommended way to downsample a TSDS is with ILM. To learn more, try the -<> example. diff --git a/docs/reference/data-streams/downsampling.asciidoc b/docs/reference/data-streams/downsampling.asciidoc deleted file mode 100644 index 10a0241cf0732..0000000000000 --- a/docs/reference/data-streams/downsampling.asciidoc +++ /dev/null @@ -1,243 +0,0 @@ -[[downsampling]] - -///// -[source,console] --------------------------------------------------- -DELETE _ilm/policy/my_policy --------------------------------------------------- -// TEST -// TEARDOWN -///// - -=== Downsampling a time series data stream - -Downsampling provides a method to reduce the footprint of your <> by storing it at reduced granularity. - -Metrics solutions collect large amounts of time series data that grow over time. -As that data ages, it becomes less relevant to the current state of the system. -The downsampling process rolls up documents within a fixed time interval into a -single summary document. Each summary document includes statistical -representations of the original data: the `min`, `max`, `sum` and `value_count` -for each metric. Data stream <> -are stored unchanged. - -Downsampling, in effect, lets you to trade data resolution and precision for -storage size. You can include it in an <> policy to automatically manage the volume and associated cost of -your metrics data at it ages. - -Check the following sections to learn more: - -* <> -* <> -* <> -* <> -* <> - -[discrete] -[[how-downsampling-works]] -=== How it works - -A <> is a sequence of observations taken over time for -a specific entity. The observed samples can be represented as a continuous -function, where the time series dimensions remain constant and the time series -metrics change over time. - -//.Sampling a continuous function -image::images/data-streams/time-series-function.png[align="center"] - -In an Elasticsearch index, a single document is created for each timestamp, -containing the immutable time series dimensions, together with the metrics names -and the changing metrics values. For a single timestamp, several time series -dimensions and metrics may be stored. - -//.Metric anatomy -image::images/data-streams/time-series-metric-anatomy.png[align="center"] - -For your most current and relevant data, the metrics series typically has a low -sampling time interval, so it's optimized for queries that require a high data -resolution. - -.Original metrics series -image::images/data-streams/time-series-original.png[align="center"] - -Downsampling works on older, less frequently accessed data by replacing the -original time series with both a data stream of a higher sampling interval and -statistical representations of that data. Where the original metrics samples may -have been taken, for example, every ten seconds, as the data ages you may choose -to reduce the sample granularity to hourly or daily. You may choose to reduce -the granularity of `cold` archival data to monthly or less. - -.Downsampled metrics series -image::images/data-streams/time-series-downsampled.png[align="center"] - -[discrete] -[[downsample-api-process]] -==== The downsampling process - -The downsampling operation traverses the source TSDS index and performs the -following steps: - -. Creates a new document for each value of the `_tsid` field and each -`@timestamp` value, rounded to the `fixed_interval` defined in the downsample -configuration. -. For each new document, copies all <> from the source index to the target index. Dimensions in a -TSDS are constant, so this is done only once per bucket. -. For each <> field, computes aggregations -for all documents in the bucket. Depending on the metric type of each metric -field a different set of pre-aggregated results is stored: - -** `gauge`: The `min`, `max`, `sum`, and `value_count` are stored; `value_count` -is stored as type `aggregate_metric_double`. -** `counter`: The `last_value` is stored. -. For all other fields, the most recent value is copied to the target index. - -[discrete] -[[downsample-api-mappings]] -==== Source and target index field mappings - -Fields in the target, downsampled index are created based on fields in the -original source index, as follows: - -. All fields mapped with the `time-series-dimension` parameter are created in -the target downsample index with the same mapping as in the source index. -. All fields mapped with the `time_series_metric` parameter are created -in the target downsample index with the same mapping as in the source -index. An exception is that for fields mapped as `time_series_metric: gauge` -the field type is changed to `aggregate_metric_double`. -. All other fields that are neither dimensions nor metrics (that is, label -fields), are created in the target downsample index with the same mapping -that they had in the source index. - -[discrete] -[[running-downsampling]] -=== Running downsampling on time series data - -To downsample a time series index, use the -<> and set `fixed_interval` to -the level of granularity that you'd like: - -include::../indices/downsample-data-stream.asciidoc[tag=downsample-example] - -To downsample time series data as part of ILM, include a -<> in your ILM policy and set `fixed_interval` -to the level of granularity that you'd like: - -[source,console] ----- -PUT _ilm/policy/my_policy -{ - "policy": { - "phases": { - "warm": { - "actions": { - "downsample" : { - "fixed_interval": "1h" - } - } - } - } - } -} ----- - -[discrete] -[[querying-downsampled-indices]] -=== Querying downsampled indices - -You can use the <> and <> -endpoints to query a downsampled index. Multiple raw data and downsampled -indices can be queried in a single request, and a single request can include -downsampled indices at different granularities (different bucket timespan). That -is, you can query data streams that contain downsampled indices with multiple -downsampling intervals (for example, `15m`, `1h`, `1d`). - -The result of a time based histogram aggregation is in a uniform bucket size and -each downsampled index returns data ignoring the downsampling time interval. For -example, if you run a `date_histogram` aggregation with `"fixed_interval": "1m"` -on a downsampled index that has been downsampled at an hourly resolution -(`"fixed_interval": "1h"`), the query returns one bucket with all of the data at -minute 0, then 59 empty buckets, and then a bucket with data again for the next -hour. - -[discrete] -[[querying-downsampled-indices-notes]] -==== Notes on downsample queries - -There are a few things to note about querying downsampled indices: - -* When you run queries in {kib} and through Elastic solutions, a normal -response is returned without notification that some of the queried indices are -downsampled. -* For -<>, -only `fixed_intervals` (and not calendar-aware intervals) are supported. -* Timezone support comes with caveats: - -** Date histograms at intervals that are multiples of an hour are based on -values generated at UTC. This works well for timezones that are on the hour, e.g. -+5:00 or -3:00, but requires offsetting the reported time buckets, e.g. -`2020-01-01T10:30:00.000` instead of `2020-03-07T10:00:00.000` for -timezone +5:30 (India), if downsampling aggregates values per hour. In this case, -the results include the field `downsampled_results_offset: true`, to indicate that -the time buckets are shifted. This can be avoided if a downsampling interval of 15 -minutes is used, as it allows properly calculating hourly values for the shifted -buckets. - -** Date histograms at intervals that are multiples of a day are similarly -affected, in case downsampling aggregates values per day. In this case, the -beginning of each day is always calculated at UTC when generated the downsampled -values, so the time buckets need to be shifted, e.g. reported as -`2020-03-07T19:00:00.000` instead of `2020-03-07T00:00:00.000` for timezone `America/New_York`. -The field `downsampled_results_offset: true` is added in this case too. - -** Daylight savings and similar peculiarities around timezones affect -reported results, as <> -for date histogram aggregation. Besides, downsampling at daily interval -hinders tracking any information related to daylight savings changes. - -[discrete] -[[downsampling-restrictions]] -=== Restrictions and limitations - -The following restrictions and limitations apply for downsampling: - -* Only indices in a <> are supported. - -* Data is downsampled based on the time dimension only. All other dimensions are -copied to the new index without any modification. - -* Within a data stream, a downsampled index replaces the original index and the -original index is deleted. Only one index can exist for a given time period. - -* A source index must be in read-only mode for the downsampling process to -succeed. Check the <> example for -details. - -* Downsampling data for the same period many times (downsampling of a -downsampled index) is supported. The downsampling interval must be a multiple of -the interval of the downsampled index. - -* Downsampling is provided as an ILM action. See <>. - -* The new, downsampled index is created on the data tier of the original index -and it inherits its settings (for example, the number of shards and replicas). - -* The numeric `gauge` and `counter` <> are -supported. - -* The downsampling configuration is extracted from the time series data stream -<>. The only additional -required setting is the downsampling `fixed_interval`. - -[discrete] -[[try-out-downsampling]] -=== Try it out - -To take downsampling for a test run, try our example of -<>. - -Downsampling can easily be added to your ILM policy. To learn how, try our -<> example. diff --git a/docs/reference/data-streams/lifecycle/apis/delete-lifecycle.asciidoc b/docs/reference/data-streams/lifecycle/apis/delete-lifecycle.asciidoc deleted file mode 100644 index 5222d33b5870b..0000000000000 --- a/docs/reference/data-streams/lifecycle/apis/delete-lifecycle.asciidoc +++ /dev/null @@ -1,94 +0,0 @@ -[[data-streams-delete-lifecycle]] -=== Delete the lifecycle of a data stream -++++ -Delete Data Stream Lifecycle -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-data-stream[Data stream APIs]. --- - -Deletes the <> from a set of data streams. - -[[delete-lifecycle-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `manage_data_stream_lifecycle` index privilege or higher to -use this API. For more information, see <>. - -[[data-streams-delete-lifecycle-request]] -==== {api-request-title} - -`DELETE _data_stream//_lifecycle` - -[[data-streams-delete-lifecycle-desc]] -==== {api-description-title} - -Deletes the lifecycle from the specified data streams. If multiple data streams are provided but at least one of them -does not exist, then the deletion of the lifecycle will fail for all of them and the API will respond with `404`. - -[[data-streams-delete-lifecycle-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) Comma-separated list of data streams used to limit the request. Supports wildcards (`*`). -To target all data streams use `*` or `_all`. - - -[role="child_attributes"] -[[delete-data-lifecycle-api-query-parms]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=ds-expand-wildcards] -+ -Defaults to `open`. - -[[data-streams-delete-lifecycle-example]] -==== {api-examples-title} - -//// - -[source,console] --------------------------------------------------- -PUT /_index_template/my-template -{ - "index_patterns" : ["my-data-stream*"], - "priority" : 1, - "data_stream": {}, - "template": { - "lifecycle" : { - "data_retention" : "7d" - } - } -} - -PUT /_data_stream/my-data-stream --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _data_stream/my-data-stream -DELETE _index_template/my-template --------------------------------------------------- -// TEARDOWN - -//// - -The following example deletes the lifecycle of `my-data-stream`: - -[source,console] --------------------------------------------------- -DELETE _data_stream/my-data-stream/_lifecycle --------------------------------------------------- - -When the policy is successfully deleted from all selected data streams, you receive the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged": true -} --------------------------------------------------- diff --git a/docs/reference/data-streams/lifecycle/apis/explain-lifecycle.asciidoc b/docs/reference/data-streams/lifecycle/apis/explain-lifecycle.asciidoc deleted file mode 100644 index 8289fb54d51bd..0000000000000 --- a/docs/reference/data-streams/lifecycle/apis/explain-lifecycle.asciidoc +++ /dev/null @@ -1,124 +0,0 @@ -[[data-streams-explain-lifecycle]] -=== Explain data stream lifecycle -++++ -Explain Data Stream Lifecycle -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-data-stream[Data stream APIs]. --- - -Retrieves the current <> status for one or more data stream backing indices. - -[[explain-lifecycle-api-prereqs]] -==== {api-prereq-title} - -If the {es} {security-features} are enabled, you must have at least the `manage_data_stream_lifecycle` index privilege or -`view_index_metadata` index privilege to use this API. For more information, see <>. - -[[data-streams-explain-lifecycle-request]] -==== {api-request-title} - -`GET /_lifecycle/explain` - -[[data-streams-explain-lifecycle-desc]] -==== {api-description-title} - -Retrieves information about the index or data stream's current data stream lifecycle state, such as -time since index creation, time since rollover, the lifecycle configuration -managing the index, or any error that {es} might've encountered during the lifecycle -execution. - -[[data-streams-explain-lifecycle-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) Comma-separated list of indices or data streams. - -[[data-streams-explain-lifecycle-query-params]] -==== {api-query-parms-title} - -`include_defaults`:: - (Optional, Boolean) Includes default configurations related to the lifecycle of the target. - Defaults to `false`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] - -[[data-streams-explain-lifecycle-example]] -==== {api-examples-title} - -If you want to retrieve the lifecycle state of all the backing indices of a data stream, you can use the data stream name. -For simplicity, the following example retrieves the lifecycle state of one backing index `.ds-metrics-2023.03.22-000001`: - -[source,console] --------------------------------------------------- -GET .ds-metrics-2023.03.22-000001/_lifecycle/explain --------------------------------------------------- -// TEST[skip:we're not setting up data stream lifecycle in these tests] - -If the index is managed by a data stream lifecycle `explain` will show the `managed_by_lifecycle` field -set to `true` and the rest of the response will contain information about the -lifecycle execution status for this index: - -[source,console-result] --------------------------------------------------- -{ - "indices": { - ".ds-metrics-2023.03.22-000001": { - "index" : ".ds-metrics-2023.03.22-000001", - "managed_by_lifecycle" : true, <1> - "index_creation_date_millis" : 1679475563571, <2> - "time_since_index_creation" : "843ms", <3> - "rollover_date_millis" : 1679475564293, <4> - "time_since_rollover" : "121ms", <5> - "lifecycle" : { }, <6> - "generation_time" : "121ms" <7> - } -} --------------------------------------------------- -// TESTRESPONSE[skip:the result is for illustrating purposes only] - -<1> Shows if the index is being managed by data stream lifecycle. If the index is not managed by -a data stream lifecycle the other fields will not be shown -<2> When the index was created, this timestamp is used to determine when to -rollover -<3> The time since the index creation (used for calculating when to rollover -the index via the `max_age`) -<4> When the index was rolled over. If the index was not rolled over this will not be -shown. -<5> The time since rollover. If the index was not rolled over this will not be shown. -<6> The lifecycle configuration that applies to this index (which is configured on the parent -data stream) -<7> The generation time of the index represents the time since the index started progressing -towards the user configurable / business specific parts of the lifecycle (e.g. retention). -The `generation_time` is calculated from the origination date if it exists, or from the -rollover date if it exists, or from the creation date if neither of the other two exist. -If the index is the write index the `generation_time` will not be reported because it is not -eligible for retention or other parts of the lifecycle. - -The `explain` will also report any errors related to the lifecycle execution for the target -index: - -[source,console-result] --------------------------------------------------- -{ - "indices": { - ".ds-metrics-2023.03.22-000001": { - "index" : ".ds-metrics-2023.03.22-000001", - "managed_by_lifecycle" : true, - "index_creation_date_millis" : 1679475563571, - "time_since_index_creation" : "843ms", - "lifecycle" : { - "enabled": true - }, - "error": "{\"type\":\"validation_exception\",\"reason\":\"Validation Failed: 1: this action would add [2] shards, but this cluster -currently has [4]/[3] maximum normal shards open;\"}" <1> - } -} --------------------------------------------------- -// TESTRESPONSE[skip:the result is for illustrating purposes only] - -<1> The target index could not be rolled over due to a limitation in the number of shards -allowed in the cluster. diff --git a/docs/reference/data-streams/lifecycle/apis/get-lifecycle-stats.asciidoc b/docs/reference/data-streams/lifecycle/apis/get-lifecycle-stats.asciidoc deleted file mode 100644 index 0fbe7de287f7b..0000000000000 --- a/docs/reference/data-streams/lifecycle/apis/get-lifecycle-stats.asciidoc +++ /dev/null @@ -1,97 +0,0 @@ -[[data-streams-get-lifecycle-stats]] -=== Get data stream lifecycle stats -++++ -Get Data Stream Lifecycle -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-data-stream[Data stream APIs]. --- - -Gets stats about the execution of <>. - -[[get-lifecycle-stats-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `monitor` or -`manage` <> to use this API. - -[[data-streams-get-lifecycle-stats-request]] -==== {api-request-title} - -`GET _lifecycle/stats` - -[[data-streams-get-lifecycle-stats-desc]] -==== {api-description-title} - -Gets stats about the execution of the data stream lifecycle. The data stream level stats include only stats about data streams -managed by the data stream lifecycle. - -[[get-lifecycle-stats-api-response-body]] -==== {api-response-body-title} - -`last_run_duration_in_millis`:: -(Optional, long) -The duration of the last data stream lifecycle execution. -`time_between_starts_in_millis`:: -(Optional, long) -The time passed between the start of the last two data stream lifecycle executions. This should amount approximately to -<>. -`data_stream_count`:: -(integer) -The count of data streams currently being managed by the data stream lifecycle. -`data_streams`:: -(array of objects) -Contains information about the retrieved data stream lifecycles. -+ -.Properties of objects in `data_streams` -[%collapsible%open] -==== -`name`:: -(string) -The name of the data stream. -`backing_indices_in_total`:: -(integer) -The count of the backing indices of this data stream that are managed by the data stream lifecycle. -`backing_indices_in_error`:: -(integer) -The count of the backing indices of this data stream that are managed by the data stream lifecycle and have encountered an error. -==== - -[[data-streams-get-lifecycle-stats-example]] -==== {api-examples-title} - -Let's retrieve the data stream lifecycle stats of a cluster that has already executed the lifecycle more than once: - -[source,console] --------------------------------------------------- -GET _lifecycle/stats?human&pretty --------------------------------------------------- -// TEST[skip:this is for demonstration purposes only, we cannot ensure that DSL has run] - -The response will look like the following: - -[source,console-result] --------------------------------------------------- -{ - "last_run_duration_in_millis": 2, - "last_run_duration": "2ms", - "time_between_starts_in_millis": 9998, - "time_between_starts": "9.99s", - "data_streams_count": 2, - "data_streams": [ - { - "name": "my-data-stream", - "backing_indices_in_total": 2, - "backing_indices_in_error": 0 - }, - { - "name": "my-other-stream", - "backing_indices_in_total": 2, - "backing_indices_in_error": 1 - } - ] -} --------------------------------------------------- diff --git a/docs/reference/data-streams/lifecycle/apis/get-lifecycle.asciidoc b/docs/reference/data-streams/lifecycle/apis/get-lifecycle.asciidoc deleted file mode 100644 index 57d63fee2ddc1..0000000000000 --- a/docs/reference/data-streams/lifecycle/apis/get-lifecycle.asciidoc +++ /dev/null @@ -1,179 +0,0 @@ -[[data-streams-get-lifecycle]] -=== Get the lifecycle of a data stream -++++ -Get Data Stream Lifecycle -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-data-stream[Data stream APIs]. --- - -Gets the <> of a set of <>. - -[[get-lifecycle-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have at least one of the `manage` -<>, the `manage_data_stream_lifecycle` index privilege, or the -`view_index_metadata` privilege to use this API. For more information, see <>. - -[[data-streams-get-lifecycle-request]] -==== {api-request-title} - -`GET _data_stream//_lifecycle` - -[[data-streams-get-lifecycle-desc]] -==== {api-description-title} - -Gets the lifecycle of the specified data streams. If multiple data streams are requested but at least one of them -does not exist, then the API will respond with `404` since at least one of the requested resources could not be retrieved. -If the requested data streams do not have a lifecycle configured they will still be included in the API response but the -`lifecycle` key will be missing. - -[[data-streams-get-lifecycle-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) Comma-separated list of data streams used to limit the request. Supports wildcards (`*`). -To target all data streams use `*` or `_all`. - -[role="child_attributes"] -[[get-data-lifecycle-api-query-parms]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=ds-expand-wildcards] -+ -Defaults to `open`. - -`include_defaults`:: -(Optional, Boolean) If `true`, return all default settings in the response. -Defaults to `false`. - -[role="child_attributes"] -[[get-lifecycle-api-response-body]] -==== {api-response-body-title} - -`data_streams`:: -(array of objects) -Contains information about retrieved data stream lifecycles. -+ -.Properties of objects in `data_streams` -[%collapsible%open] -==== -`name`:: -(string) -Name of the data stream. -`lifecycle`:: -(Optional, object) -+ -.Properties of `lifecycle` -[%collapsible%open] -===== -`data_retention`:: -(Optional, string) -If defined, it represents the retention requested by the data stream owner for this data stream. - -`effective_retention`:: -(Optional, string) -If defined, every document added to this data stream will be stored at least for this time frame. Any time after this -duration the document could be deleted. When empty, every document in this data stream will be stored indefinitely. -duration the document could be deleted. When empty, every document in this data stream will be stored indefinitely. The -effective retention is calculated as described in the <>. - -`retention_determined_by`:: -(Optional, string) -The source of the retention, it can be one of three values, `data_stream_configuration`, `default_retention` or `max_retention`. - -`rollover`:: -(Optional, object) -The conditions which will trigger the rollover of a backing index as configured by the cluster setting -`cluster.lifecycle.default.rollover`. This property is an implementation detail and it will only be retrieved -when the query param `include_defaults` is set to `true`. The contents of this field are subject to change. -===== -==== - -`global_retention`:: -(object) -Contains the global max and default retention. When no global retention is configured, this will be an empty object. -+ -.Properties of `global_retention` -[%collapsible%open] -==== -`max_retention`:: -(Optional, string) -The effective retention of data streams managed by the data stream lifecycle cannot exceed this value. -`default_retention`:: -(Optional, string) -This will be the effective retention of data streams managed by the data stream lifecycle that do not specify `data_retention`. -==== - -[[data-streams-get-lifecycle-example]] -==== {api-examples-title} - -//// - -[source,console] --------------------------------------------------- -PUT /_index_template/my-template -{ - "index_patterns" : ["my-data-stream*"], - "priority" : 1, - "data_stream": {}, - "template": { - "lifecycle" : { - "data_retention" : "7d" - } - } -} - -PUT /_data_stream/my-data-stream-1 -PUT /_data_stream/my-data-stream-2 --------------------------------------------------- -// TESTSETUP - -[source,console] --------------------------------------------------- -DELETE _data_stream/my-data-stream* -DELETE _index_template/my-template --------------------------------------------------- -// TEARDOWN - -//// - -Let's retrieve the lifecycles: - -[source,console] --------------------------------------------------- -GET _data_stream/my-data-stream*/_lifecycle --------------------------------------------------- - -The response will look like the following: - -[source,console-result] --------------------------------------------------- -{ - "data_streams": [ - { - "name": "my-data-stream-1", - "lifecycle": { - "enabled": true, - "data_retention": "7d", - "effective_retention": "7d", - "retention_determined_by": "data_stream_configuration" - } - }, - { - "name": "my-data-stream-2", - "lifecycle": { - "enabled": true, - "data_retention": "7d", - "effective_retention": "7d", - "retention_determined_by": "data_stream_configuration" - } - } - ], - "global_retention": {} -} --------------------------------------------------- diff --git a/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc b/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc deleted file mode 100644 index c5002cf4882e2..0000000000000 --- a/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc +++ /dev/null @@ -1,126 +0,0 @@ -[[data-streams-put-lifecycle]] -=== Set the lifecycle of a data stream -++++ -Put Data Stream Lifecycle -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-data-stream[Data stream APIs]. --- - -Configures the data stream <> for the targeted <>. - -[[put-lifecycle-api-prereqs]] -==== {api-prereq-title} - -If the {es} {security-features} are enabled, you must have the `manage_data_stream_lifecycle` index privilege or higher to use this API. -For more information, see <>. - -[[data-streams-put-lifecycle-request]] -==== {api-request-title} - -`PUT _data_stream//_lifecycle` - -[[data-streams-put-lifecycle-desc]] -==== {api-description-title} - -Configures the data stream lifecycle for the targeted data streams. If multiple data streams are provided but at least one of them -does not exist, then the update of the lifecycle will fail for all of them and the API will respond with `404`. - -[[data-streams-put-lifecycle-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) Comma-separated list of data streams used to limit the request. Supports wildcards (`*`). -To target all data streams use `*` or `_all`. - -[role="child_attributes"] -[[put-data-lifecycle-api-query-parms]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=ds-expand-wildcards] -+ -Defaults to `open`. - -[[put-lifecycle-api-request-body]] -==== {api-request-body-title} - -`lifecycle`:: -(Required, object) -+ -.Properties of `lifecycle` -[%collapsible%open] -==== -`data_retention`:: -(Optional, string) -If defined, every document added to this data stream will be stored at least for this time frame. Any time after this -duration the document could be deleted. When empty, every document in this data stream will be stored indefinitely. - -`enabled`:: -(Optional, boolean) -If defined, it turns data stream lifecycle on/off (`true`/`false`) for this data stream. -A data stream lifecycle that's disabled (`enabled: false`) will have no effect on the -data stream. Defaults to `true`. - -`downsampling`:: -(Optional, array) -An optional array of downsampling configuration objects, each defining an `after` -interval representing when the backing index is meant to be downsampled (the time -frame is calculated since the index was rolled over, i.e. generation time) and -a `fixed_interval` representing the downsampling interval (the minimum `fixed_interval` -value is `5m`). A maximum number of 10 downsampling rounds can be configured. -See <> below. - -==== - -[[data-streams-put-lifecycle-example]] -==== {api-examples-title} - -The following example sets the lifecycle of `my-data-stream`: - -[source,console] --------------------------------------------------- -PUT _data_stream/my-data-stream/_lifecycle -{ - "data_retention": "7d" -} --------------------------------------------------- -// TEST[setup:my_data_stream] -// TEST[teardown:data_stream_cleanup] - -When the lifecycle is successfully updated in all data streams, you receive the following result: - -[source,console-result] --------------------------------------------------- -{ - "acknowledged": true -} --------------------------------------------------- - -[[data-streams-put-lifecycle-downsampling-example]] -==== {api-examples-title} - -The following example configures two downsampling rounds, the first one starting -one day after the backing index is rolled over (or later, if the index is still -within its write-accepting <>) with an interval -of `10m`, and a second round starting 7 days after rollover at an interval of `1d`: - -[source,console] --------------------------------------------------------------------- -PUT _data_stream/my-weather-sensor-data-stream/_lifecycle -{ - "downsampling": [ - { - "after": "1d", - "fixed_interval": "10m" - }, - { - "after": "7d", - "fixed_interval": "1d" - } - ] -} --------------------------------------------------------------------- -//TEST[skip:downsampling requires waiting for indices to be out of time bounds] diff --git a/docs/reference/data-streams/lifecycle/index.asciidoc b/docs/reference/data-streams/lifecycle/index.asciidoc deleted file mode 100644 index e4d5acfb704d3..0000000000000 --- a/docs/reference/data-streams/lifecycle/index.asciidoc +++ /dev/null @@ -1,82 +0,0 @@ -[role="xpack"] -[[data-stream-lifecycle]] -== Data stream lifecycle - -A data stream lifecycle is the built-in mechanism data streams use to manage their lifecycle. It enables you to easily -automate the management of your data streams according to your retention requirements. For example, you could configure -the lifecycle to: - -* Ensure that data indexed in the data stream will be kept at least for the retention time you defined. -* Ensure that data older than the retention period will be deleted automatically by {es} at a later time. - -To achieve that, it supports: - -* Automatic <>, which chunks your incoming data in smaller pieces to facilitate better performance -and backwards incompatible mapping changes. -* Configurable retention, which allows you to configure the time period for which your data is guaranteed to be stored. -{es} is allowed at a later time to delete data older than this time period. Retention can be configured on the data stream level -or on a global level. Read more about the different options in this <>. - -A data stream lifecycle also supports downsampling the data stream backing indices. -See <> for -more details. - -[discrete] -[[data-streams-lifecycle-how-it-works]] -=== How does it work? - -In intervals configured by <>, {es} goes over -each data stream and performs the following steps: - -1. Checks if the data stream has a data stream lifecycle configured, skipping any indices not part of a managed data stream. -2. Rolls over the write index of the data stream, if it fulfills the conditions defined by -<>. -3. After an index is not the write index anymore (i.e. the data stream has been rolled over), -automatically tail merges the index. Data stream lifecycle executes a merge operation that only targets -the long tail of small segments instead of the whole shard. As the segments are organised -into tiers of exponential sizes, merging the long tail of small segments is only a -fraction of the cost of force merging to a single segment. The small segments would usually -hold the most recent data so tail merging will focus the merging resources on the higher-value -data that is most likely to keep being queried. -4. If <> is configured it will execute -all the configured downsampling rounds. -5. Applies retention to the remaining backing indices. This means deleting the backing indices whose -`generation_time` is longer than the effective retention period (read more about the -<>). The `generation_time` is only applicable to rolled -over backing indices and it is either the time since the backing index got rolled over, or the time optionally configured -in the <> setting. - -IMPORTANT: We use the `generation_time` instead of the creation time because this ensures that all data in the backing -index have passed the retention period. As a result, the retention period is not the exact time data gets deleted, but -the minimum time data will be stored. - -NOTE: Steps `2-4` apply only to backing indices that are not already managed by {ilm-init}, meaning that these indices either do -not have an {ilm-init} policy defined, or if they do, they have <> -set to `false`. - -[discrete] -[[data-stream-lifecycle-configuration]] -=== Configuring data stream lifecycle - -Since the lifecycle is configured on the data stream level, the process to configure a lifecycle on a new data stream and -on an existing one differ. - -In the following sections, we will go through the following tutorials: - -* To create a new data stream with a lifecycle, you need to add the data stream lifecycle as part of the index template -that matches the name of your data stream (see <>). When a write operation -with the name of your data stream reaches {es} then the data stream will be created with the respective data stream lifecycle. -* To update the lifecycle of an existing data stream you need to use the <> -to edit the lifecycle on the data stream itself (see <>). -* Migrate an existing {ilm-init} managed data stream to Data stream lifecycle using <>. - -NOTE: Updating the data stream lifecycle of an existing data stream is different from updating the settings or the mapping, -because it is applied on the data stream level and not on the individual backing indices. - -include::tutorial-manage-new-data-stream.asciidoc[] - -include::tutorial-manage-existing-data-stream.asciidoc[] - -include::tutorial-manage-data-stream-retention.asciidoc[] - -include::tutorial-migrate-data-stream-from-ilm-to-dsl.asciidoc[] diff --git a/docs/reference/data-streams/lifecycle/tutorial-manage-data-stream-retention.asciidoc b/docs/reference/data-streams/lifecycle/tutorial-manage-data-stream-retention.asciidoc deleted file mode 100644 index a7f0379a45167..0000000000000 --- a/docs/reference/data-streams/lifecycle/tutorial-manage-data-stream-retention.asciidoc +++ /dev/null @@ -1,228 +0,0 @@ -[role="xpack"] -[[tutorial-manage-data-stream-retention]] -=== Tutorial: Data stream retention - -In this tutorial, we are going to go over the data stream lifecycle retention; we will define it, go over how it can be configured -and how it can gets applied. Keep in mind, the following options apply only to data streams that are managed by the data stream lifecycle. - -. <> -. <> -. <> -. <> - -You can verify if a data steam is managed by the data stream lifecycle via the <>: - -//// -[source,console] ----- -PUT /_index_template/template -{ - "index_patterns": ["my-data-stream*"], - "template": { - "lifecycle": {} - }, - "data_stream": { } -} - -PUT /_data_stream/my-data-stream ----- -// TESTSETUP -//// - -//// -[source,console] ----- -DELETE /_data_stream/my-data-stream* -DELETE /_index_template/template -PUT /_cluster/settings -{ - "persistent" : { - "data_streams.lifecycle.retention.*" : null - } -} ----- -// TEARDOWN -//// - -[source,console] --------------------------------------------------- -GET _data_stream/my-data-stream/_lifecycle --------------------------------------------------- - -The result should look like this: - -[source,console-result] --------------------------------------------------- -{ - "data_streams": [ - { - "name": "my-data-stream", <1> - "lifecycle": { - "enabled": true <2> - } - } - ] -} --------------------------------------------------- -// TESTRESPONSE[skip:the result is for illustrating purposes only] -<1> The name of your data stream. -<2> Ensure that the lifecycle is enabled, meaning this should be `true`. - -[discrete] -[[what-is-retention]] -==== What is data stream retention? - -We define retention as the least amount of time the data of a data stream are going to be kept in {es}. After this time period -has passed, {es} is allowed to remove these data to free up space and/or manage costs. - -NOTE: Retention does not define the period that the data will be removed, but the minimum time period they will be kept. - -We define 4 different types of retention: - -* The data stream retention, or `data_retention`, which is the retention configured on the data stream level. It can be -set via an <> for future data streams or via the <> for an existing data stream. When the data stream retention is not set, it implies that the data -need to be kept forever. -* The global default retention, let's call it `default_retention`, which is a retention configured via the cluster setting -<> and will be -applied to all data streams managed by data stream lifecycle that do not have `data_retention` configured. Effectively, -it ensures that there will be no data streams keeping their data forever. This can be set via the -<>. -* The global max retention, let's call it `max_retention`, which is a retention configured via the cluster setting -<> and will be applied to -all data streams managed by data stream lifecycle. Effectively, it ensures that there will be no data streams whose retention -will exceed this time period. This can be set via the <>. -* The effective retention, or `effective_retention`, which is the retention applied at a data stream on a given moment. -Effective retention cannot be set, it is derived by taking into account all the configured retention listed above and is -calculated as it is described <>. - -NOTE: Global default and max retention do not apply to data streams internal to elastic. Internal data streams are recognised - either by having the `system` flag set to `true` or if their name is prefixed with a dot (`.`). - -[discrete] -[[retention-configuration]] -==== How to configure retention? - -- By setting the `data_retention` on the data stream level. This retention can be configured in two ways: -+ --- For new data streams, it can be defined in the index template that would be applied during the data stream's creation. -You can use the <>, for example: -+ -[source,console] --------------------------------------------------- -PUT _index_template/template -{ - "index_patterns": ["my-data-stream*"], - "data_stream": { }, - "priority": 500, - "template": { - "lifecycle": { - "data_retention": "7d" - } - }, - "_meta": { - "description": "Template with data stream lifecycle" - } -} --------------------------------------------------- --- For an existing data stream, it can be set via the <>. -+ -[source,console] ----- -PUT _data_stream/my-data-stream/_lifecycle -{ - "data_retention": "30d" <1> -} ----- -// TEST[continued] -<1> The retention period of this data stream is set to 30 days. - -- By setting the global retention via the `data_streams.lifecycle.retention.default` and/or `data_streams.lifecycle.retention.max` -that are set on a cluster level. You can be set via the <>. For example: -+ -[source,console] --------------------------------------------------- -PUT /_cluster/settings -{ - "persistent" : { - "data_streams.lifecycle.retention.default" : "7d", - "data_streams.lifecycle.retention.max" : "90d" - } -} --------------------------------------------------- -// TEST[continued] - -[discrete] -[[effective-retention-calculation]] -==== How is the effective retention calculated? -The effective is calculated in the following way: - -- The `effective_retention` is the `default_retention`, when `default_retention` is defined and the data stream does not -have `data_retention`. -- The `effective_retention` is the `data_retention`, when `data_retention` is defined and if `max_retention` is defined, -it is less than the `max_retention`. -- The `effective_retention` is the `max_retention`, when `max_retention` is defined, and the data stream has either no -`data_retention` or its `data_retention` is greater than the `max_retention`. - -The above is demonstrated in the examples below: - -|=== -|`default_retention` |`max_retention` |`data_retention` |`effective_retention` |Retention determined by - -|Not set |Not set |Not set |Infinite |N/A -|Not relevant |12 months |**30 days** |30 days |`data_retention` -|Not relevant |Not set |**30 days** |30 days |`data_retention` -|**30 days** |12 months |Not set |30 days |`default_retention` -|**30 days** |30 days |Not set |30 days |`default_retention` -|Not relevant |**30 days** |12 months |30 days |`max_retention` -|Not set |**30 days** |Not set |30 days |`max_retention` -|=== - -Considering our example, if we retrieve the lifecycle of `my-data-stream`: -[source,console] ----- -GET _data_stream/my-data-stream/_lifecycle ----- -// TEST[continued] - -We see that it will remain the same with what the user configured: -[source,console-result] ----- -{ - "global_retention" : { - "max_retention" : "90d", <1> - "default_retention" : "7d" <2> - }, - "data_streams": [ - { - "name": "my-data-stream", - "lifecycle": { - "enabled": true, - "data_retention": "30d", <3> - "effective_retention": "30d", <4> - "retention_determined_by": "data_stream_configuration" <5> - } - } - ] -} ----- -<1> The maximum retention configured in the cluster. -<2> The default retention configured in the cluster. -<3> The requested retention for this data stream. -<4> The retention that is applied by the data stream lifecycle on this data stream. -<5> The configuration that determined the effective retention. In this case it's the `data_configuration` because -it is less than the `max_retention`. - -[discrete] -[[effective-retention-application]] -==== How is the effective retention applied? - -Retention is applied to the remaining backing indices of a data stream as the last step of -<>. Data stream lifecycle will retrieve the backing indices -whose `generation_time` is longer than the effective retention period and delete them. The `generation_time` is only -applicable to rolled over backing indices and it is either the time since the backing index got rolled over, or the time -optionally configured in the <> setting. - -IMPORTANT: We use the `generation_time` instead of the creation time because this ensures that all data in the backing -index have passed the retention period. As a result, the retention period is not the exact time data get deleted, but -the minimum time data will be stored. diff --git a/docs/reference/data-streams/lifecycle/tutorial-manage-existing-data-stream.asciidoc b/docs/reference/data-streams/lifecycle/tutorial-manage-existing-data-stream.asciidoc deleted file mode 100644 index 56c26d42d3ffb..0000000000000 --- a/docs/reference/data-streams/lifecycle/tutorial-manage-existing-data-stream.asciidoc +++ /dev/null @@ -1,136 +0,0 @@ -[role="xpack"] -[[tutorial-manage-existing-data-stream]] -=== Tutorial: Update existing data stream - -To update the lifecycle of an existing data stream you do the following actions: - -. <> -. <> - -[discrete] -[[set-lifecycle]] -==== Set a data stream's lifecycle - -To add or to change the retention period of your data stream you can use the <>. - -* You can set infinite retention period, meaning that your data should never be deleted. For example: -+ -[source,console] ----- -PUT _data_stream/my-data-stream/_lifecycle -{ } <1> ----- -// TEST[setup:my_data_stream] -<1> An empty payload means that your data stream is still managed but the data will never be deleted. Managing a time -series data stream such as logs or metrics enables {es} to better store your data even if you do not use a retention period. - -* Or you can set the retention period of your choice. For example: -+ -[source,console] ----- -PUT _data_stream/my-data-stream/_lifecycle -{ - "data_retention": "30d" <1> -} ----- -// TEST[continued] -<1> The retention period of this data stream is set to 30 days. This means that {es} is allowed to delete data that is -older than 30 days at its own discretion. - -The changes in the lifecycle are applied on all backing indices of the data stream. You can see the effect of the change -via the <>: - -[source,console] --------------------------------------------------- -GET .ds-my-data-stream-*/_lifecycle/explain --------------------------------------------------- -// TEST[continued] - -The response will look like: - -[source,console-result] --------------------------------------------------- -{ - "indices": { - ".ds-my-data-stream-2023.04.19-000002": { - "index": ".ds-my-data-stream-2023.04.19-000002", <1> - "managed_by_lifecycle": true, <2> - "index_creation_date_millis": 1681919221417, - "time_since_index_creation": "6.85s", <3> - "lifecycle": { - "enabled": true, - "data_retention": "30d" <4> - } - }, - ".ds-my-data-stream-2023.04.17-000001": { - "index": ".ds-my-data-stream-2023.04.17-000001", <5> - "managed_by_lifecycle": true, <6> - "index_creation_date_millis": 1681745209501, - "time_since_index_creation": "48d", <7> - "rollover_date_millis": 1681919221419, - "time_since_rollover": "6.84s", <8> - "generation_time": "6.84s", <9> - "lifecycle": { - "enabled": true, - "data_retention": "30d" <10> - } - } - } -} --------------------------------------------------- -// TEST[continued] -// TESTRESPONSE[skip:the result is for illustrating purposes only] -<1> The name of the backing index. -<2> This index is managed by a data stream lifecycle. -<3> The time that has passed since this index has been created. -<4> The data retention for this index is at least 30 days, as it was recently updated. -<5> The name of the backing index. -<6> This index is managed by the built-in data stream lifecycle. -<7> The time that has passed since this index has been created. -<8> The time that has passed since this index was <>. -<9> The time that will be used to determine when it's safe to delete this index and all its data. -<10> The data retention for this index as well is at least 30 days, as it was recently updated. - -[discrete] -[[delete-lifecycle]] -==== Remove lifecycle for a data stream - -To remove the lifecycle of a data stream you can use the <>. As consequence, -the maintenance operations that were applied by the lifecycle will no longer be applied to the data stream and all its -backing indices. For example: - -[source,console] --------------------------------------------------- -DELETE _data_stream/my-data-stream/_lifecycle --------------------------------------------------- -// TEST[continued] - -You can then use the <> again to see that the indices are no longer managed. - -[source,console] --------------------------------------------------- -GET .ds-my-data-stream-*/_lifecycle/explain --------------------------------------------------- -// TEST[continued] -// TEST[teardown:data_stream_cleanup] - -[source,console-result] --------------------------------------------------- -{ - "indices": { - ".ds-my-data-stream-2023.04.19-000002": { - "index": ".ds-my-data-stream-2023.04.19-000002", <1> - "managed_by_lifecycle": false <2> - }, - ".ds-my-data-stream-2023.04.17-000001": { - "index": ".ds-my-data-stream-2023.04.19-000001", <3> - "managed_by_lifecycle": false <4> - } - } -} --------------------------------------------------- -// TESTRESPONSE[skip:the result is for illustrating purposes only] -<1> The name of the backing index. -<2> Indication that the index is not managed by the data stream lifecycle. -<3> The name of another backing index. -<4> Indication that the index is not managed by the data stream lifecycle. diff --git a/docs/reference/data-streams/lifecycle/tutorial-manage-new-data-stream.asciidoc b/docs/reference/data-streams/lifecycle/tutorial-manage-new-data-stream.asciidoc deleted file mode 100644 index 173b7a75dd28e..0000000000000 --- a/docs/reference/data-streams/lifecycle/tutorial-manage-new-data-stream.asciidoc +++ /dev/null @@ -1,153 +0,0 @@ -[role="xpack"] -[[tutorial-manage-new-data-stream]] -=== Tutorial: Create a data stream with a lifecycle - -To create a data stream with a built-in lifecycle, follow these steps: - -. <> -. <> -. <> - -[discrete] -[[create-index-template-with-lifecycle]] -==== Create an index template - -A data stream requires a matching <>. You can configure the data stream lifecycle by -setting the `lifecycle` field in the index template the same as you do for mappings and index settings. You can define an -index template that sets a lifecycle as follows: - -* Include the `data_stream` object to enable data streams. - -* Define the lifecycle in the template section or include a composable template that defines the lifecycle. - -* Use a priority higher than `200` to avoid collisions with built-in templates. -See <>. - -You can use the <>. - -[source,console] --------------------------------------------------- -PUT _index_template/my-index-template -{ - "index_patterns": ["my-data-stream*"], - "data_stream": { }, - "priority": 500, - "template": { - "lifecycle": { - "data_retention": "7d" - } - }, - "_meta": { - "description": "Template with data stream lifecycle" - } -} --------------------------------------------------- - -[discrete] -[[create-data-stream-with-lifecycle]] -==== Create a data stream - -You can create a data stream in two ways: - -. By manually creating the stream using the <>. The stream's name must -still match one of your template's index patterns. -+ -[source,console] --------------------------------------------------- -PUT _data_stream/my-data-stream --------------------------------------------------- -// TEST[continued] - -. By <> that -target the stream's name. This name must match one of your index template's index patterns. -+ -[source,console] --------------------------------------------------- -PUT my-data-stream/_bulk -{ "create":{ } } -{ "@timestamp": "2099-05-06T16:21:15.000Z", "message": "192.0.2.42 - - [06/May/2099:16:21:15 +0000] \"GET /images/bg.jpg HTTP/1.0\" 200 24736" } -{ "create":{ } } -{ "@timestamp": "2099-05-06T16:25:42.000Z", "message": "192.0.2.255 - - [06/May/2099:16:25:42 +0000] \"GET /favicon.ico HTTP/1.0\" 200 3638" } --------------------------------------------------- -// TEST[continued] - -[discrete] -[[retrieve-lifecycle-information]] -==== Retrieve lifecycle information - -You can use the <> to see the data stream lifecycle of your data stream and -the <> to see the exact state of each backing index. - -[source,console] --------------------------------------------------- -GET _data_stream/my-data-stream/_lifecycle --------------------------------------------------- -// TEST[continued] - -The result will look like this: - -[source,console-result] --------------------------------------------------- -{ - "data_streams": [ - { - "name": "my-data-stream", <1> - "lifecycle": { - "enabled": true, <2> - "data_retention": "7d", <3> - "effective_retention": "7d", <4> - "retention_determined_by": "data_stream_configuration" - } - } - ], - "global_retention": {} -} --------------------------------------------------- -<1> The name of your data stream. -<2> Shows if the data stream lifecycle is enabled for this data stream. -<3> The retention period of the data indexed in this data stream, as configured by the user. -<4> The retention period that will be applied by the data stream lifecycle. This means that the data in this data stream will - be kept at least for 7 days. After that {es} can delete it at its own discretion. - -If you want to see more information about how the data stream lifecycle is applied on individual backing indices use the -<>: - -[source,console] --------------------------------------------------- -GET .ds-my-data-stream-*/_lifecycle/explain --------------------------------------------------- -// TEST[continued] -The result will look like this: - -[source,console-result] --------------------------------------------------- -{ - "indices": { - ".ds-my-data-stream-2023.04.19-000001": { - "index": ".ds-my-data-stream-2023.04.19-000001", <1> - "managed_by_lifecycle": true, <2> - "index_creation_date_millis": 1681918009501, - "time_since_index_creation": "1.6m", <3> - "lifecycle": { <4> - "enabled": true, - "data_retention": "7d" - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[skip:the result is for illustrating purposes only] -<1> The name of the backing index. -<2> If it is managed by the built-in data stream lifecycle. -<3> Time since the index was created. -<4> The lifecycle configuration that is applied on this backing index. - -////////////////////////// -[source,console] --------------------------------------------------- -DELETE _data_stream/my-data-stream -DELETE _index_template/my-index-template --------------------------------------------------- -// TEST[continued] - -////////////////////////// diff --git a/docs/reference/data-streams/lifecycle/tutorial-migrate-data-stream-from-ilm-to-dsl.asciidoc b/docs/reference/data-streams/lifecycle/tutorial-migrate-data-stream-from-ilm-to-dsl.asciidoc deleted file mode 100644 index a2c12466b7f2b..0000000000000 --- a/docs/reference/data-streams/lifecycle/tutorial-migrate-data-stream-from-ilm-to-dsl.asciidoc +++ /dev/null @@ -1,488 +0,0 @@ -[role="xpack"] -[[tutorial-migrate-data-stream-from-ilm-to-dsl]] -=== Tutorial: Migrate ILM managed data stream to data stream lifecycle - -In this tutorial we'll look at migrating an existing data stream from <> to -<>. The existing {ilm-init} managed backing indices will continue -to be managed by {ilm-init} until they age out and get deleted by {ilm-init}; however, -the new backing indices will be managed by data stream lifecycle. -This way, a data stream is gradually migrated away from being managed by {ilm-init} to -being managed by data stream lifecycle. As we'll see, {ilm-init} and data stream lifecycle -can co-manage a data stream; however, an index can only be managed by one system at -a time. - -[discrete] -[[migrate-dsl-ilm-tldr]] -==== TL;DR -To migrate a data stream from {ilm-init} to data stream lifecycle we'll have to execute -two steps: - -1. Update the index template that's backing the data stream to set <> -to `false`, and to configure data stream lifecycle. -2. Configure the data stream lifecycle for the _existing_ data stream using -the <>. - -For more details see the <> section. - -[discrete] -[[setup-test-data]] -==== Setup ILM managed data stream -Let's first create a data stream with two backing indices managed by {ilm-init}. -We first create an {ilm-init} policy: - -[source,console] ----- -PUT _ilm/policy/pre-dsl-ilm-policy -{ - "policy": { - "phases": { - "hot": { - "actions": { - "rollover": { - "max_primary_shard_size": "50gb" - } - } - }, - "delete": { - "min_age": "7d", - "actions": { - "delete": {} - } - } - } - } -} ----- - -And let's create an index template that'll back the data stream and configures {ilm-init}: - -[source,console] ----- -PUT _index_template/dsl-data-stream-template -{ - "index_patterns": ["dsl-data-stream*"], - "data_stream": { }, - "priority": 500, - "template": { - "settings": { - "index.lifecycle.name": "pre-dsl-ilm-policy" - } - } -} ----- -// TEST[continued] - -We'll now index a document targetting `dsl-data-stream` to create the data stream -and we'll also manually rollover the data stream to have another generation index created: - -[source,console] ----- -POST dsl-data-stream/_doc? -{ - "@timestamp": "2023-10-18T16:21:15.000Z", - "message": "192.0.2.42 - - [06/May/2099:16:21:15 +0000] \"GET /images/bg.jpg HTTP/1.0\" 200 24736" -} ----- -// TEST[continued] - -[source,console] ----- -POST dsl-data-stream/_rollover ----- -// TEST[continued] - -We'll use the <> API to inspect the state of -the data stream: - -[source,console] --------------------------------------------------- -GET _data_stream/dsl-data-stream --------------------------------------------------- -// TEST[continued] - -Inspecting the response we'll see that both backing indices are managed by {ilm-init} -and that the next generation index will also be managed by {ilm-init}: - -[source,console-result] ----- -{ - "data_streams": [ - { - "name": "dsl-data-stream", - "timestamp_field": { - "name": "@timestamp" - }, - "indices": [ - { - "index_name": ".ds-dsl-data-stream-2023.10.19-000001", <1> - "index_uuid": "xCEhwsp8Tey0-FLNFYVwSg", - "prefer_ilm": true, <2> - "ilm_policy": "pre-dsl-ilm-policy", <3> - "managed_by": "Index Lifecycle Management" <4> - }, - { - "index_name": ".ds-dsl-data-stream-2023.10.19-000002", - "index_uuid": "PA_JquKGSiKcAKBA8DJ5gw", - "prefer_ilm": true, - "ilm_policy": "pre-dsl-ilm-policy", - "managed_by": "Index Lifecycle Management" - } - ], - "generation": 2, - "status": "GREEN", - "template": "dsl-data-stream-template", - "next_generation_managed_by": "Index Lifecycle Management", <5> - "prefer_ilm": true, <6> - "ilm_policy": "pre-dsl-ilm-policy", <7> - "hidden": false, - "system": false, - "allow_custom_routing": false, - "replicated": false, - "rollover_on_write": false - } - ] -} ----- -// TESTRESPONSE[s/"index_name": ".ds-dsl-data-stream-2023.10.19-000001"/"index_name": $body.data_streams.0.indices.0.index_name/] -// TESTRESPONSE[s/"index_uuid": "xCEhwsp8Tey0-FLNFYVwSg"/"index_uuid": $body.data_streams.0.indices.0.index_uuid/] -// TESTRESPONSE[s/"index_name": ".ds-dsl-data-stream-2023.10.19-000002"/"index_name": $body.data_streams.0.indices.1.index_name/] -// TESTRESPONSE[s/"index_uuid": "PA_JquKGSiKcAKBA8DJ5gw"/"index_uuid": $body.data_streams.0.indices.1.index_uuid/] -// TESTRESPONSE[s/"status": "GREEN"/"status": "YELLOW","failure_store":{"enabled": false, "indices": [], "rollover_on_write": true}/] - -<1> The name of the backing index. -<2> For each backing index we display the value of the <> -configuration which will indicate if {ilm-init} takes precedence over data stream lifecycle in case -both systems are configured for an index. -<3> The {ilm-init} policy configured for this index. -<4> The system that manages this index (possible values are "Index Lifecycle Management", -"Data stream lifecycle", or "Unmanaged") -<5> The system that will manage the next generation index (the new write index of this -data stream, once the data stream is rolled over). The possible values are -"Index Lifecycle Management", "Data stream lifecycle", or "Unmanaged". -<6> The <> value configured in the index template -that's backing the data stream. This value will be configured for all the new backing indices. -If it's not configured in the index template the backing indices will receive the `true` -default value ({ilm-init} takes precedence over data stream lifecycle by default as it's -currently richer in features). -<7> The {ilm-init} policy configured in the index template that's backing this data -stream (which will be configured on all the new backing indices, as long as it exists -in the index template). - -[discrete] -[[migrate-from-ilm-to-dsl]] -==== Migrate data stream to data stream lifecycle -To migrate the `dsl-data-stream` to data stream lifecycle we'll have to execute -two steps: - -1. Update the index template that's backing the data stream to set <> -to `false`, and to configure data stream lifecycle. -2. Configure the data stream lifecycle for the _existing_ `dsl-data-stream` using -the <>. - -IMPORTANT: The data stream lifecycle configuration that's added to the index template, -being a data stream configuration, will only apply to **new** data streams. -Our data stream exists already, so even though we added a data stream lifecycle -configuration in the index template it will not be applied to `dsl-data-stream`. - - -[[update-index-template-for-dsl]] -Let's update the index template: - -[source,console] ----- -PUT _index_template/dsl-data-stream-template -{ - "index_patterns": ["dsl-data-stream*"], - "data_stream": { }, - "priority": 500, - "template": { - "settings": { - "index.lifecycle.name": "pre-dsl-ilm-policy", - "index.lifecycle.prefer_ilm": false <1> - }, - "lifecycle": { - "data_retention": "7d" <2> - } - } -} ----- -// TEST[continued] - -<1> The `prefer_ilm` setting will now be configured on the **new** backing indices -(created by rolling over the data stream) such that {ilm-init} does _not_ take -precedence over data stream lifecycle. -<2> We're configuring the data stream lifecycle so _new_ data streams will be -managed by data stream lifecycle. - -We've now made sure that new data streams will be managed by data stream lifecycle. - -Let's update our existing `dsl-data-stream` and configure data stream lifecycle: - -[source,console] ----- -PUT _data_stream/dsl-data-stream/_lifecycle -{ - "data_retention": "7d" -} ----- -// TEST[continued] - -We can inspect the data stream to check that the next generation will indeed be -managed by data stream lifecycle: - -[source,console] --------------------------------------------------- -GET _data_stream/dsl-data-stream --------------------------------------------------- -// TEST[continued] - -[source,console-result] ----- -{ - "data_streams": [ - { - "name": "dsl-data-stream", - "timestamp_field": { - "name": "@timestamp" - }, - "indices": [ - { - "index_name": ".ds-dsl-data-stream-2023.10.19-000001", - "index_uuid": "xCEhwsp8Tey0-FLNFYVwSg", - "prefer_ilm": true, - "ilm_policy": "pre-dsl-ilm-policy", - "managed_by": "Index Lifecycle Management" <1> - }, - { - "index_name": ".ds-dsl-data-stream-2023.10.19-000002", - "index_uuid": "PA_JquKGSiKcAKBA8DJ5gw", - "prefer_ilm": true, - "ilm_policy": "pre-dsl-ilm-policy", - "managed_by": "Index Lifecycle Management" <2> - } - ], - "generation": 2, - "status": "GREEN", - "template": "dsl-data-stream-template", - "lifecycle": { - "enabled": true, - "data_retention": "7d", - "effective_retention": "7d", - "retention_determined_by": "data_stream_configuration" - }, - "ilm_policy": "pre-dsl-ilm-policy", - "next_generation_managed_by": "Data stream lifecycle", <3> - "prefer_ilm": false, <4> - "hidden": false, - "system": false, - "allow_custom_routing": false, - "replicated": false, - "rollover_on_write": false - } - ] -} ----- -// TESTRESPONSE[s/"index_name": ".ds-dsl-data-stream-2023.10.19-000001"/"index_name": $body.data_streams.0.indices.0.index_name/] -// TESTRESPONSE[s/"index_uuid": "xCEhwsp8Tey0-FLNFYVwSg"/"index_uuid": $body.data_streams.0.indices.0.index_uuid/] -// TESTRESPONSE[s/"index_name": ".ds-dsl-data-stream-2023.10.19-000002"/"index_name": $body.data_streams.0.indices.1.index_name/] -// TESTRESPONSE[s/"index_uuid": "PA_JquKGSiKcAKBA8DJ5gw"/"index_uuid": $body.data_streams.0.indices.1.index_uuid/] -// TESTRESPONSE[s/"status": "GREEN"/"status": "YELLOW","failure_store":{"enabled": false, "indices": [], "rollover_on_write": true}/] - -<1> The existing backing index will continue to be managed by {ilm-init} -<2> The existing backing index will continue to be managed by {ilm-init} -<3> The next generation index will be managed by Data stream lifecycle -<4> The `prefer_ilm` setting value we configured in the index template is reflected -and will be configured accordingly for new backing indices. - -We'll now rollover the data stream to see the new generation index being managed by -data stream lifecycle: - -[source,console] ----- -POST dsl-data-stream/_rollover ----- -// TEST[continued] - -[source,console] ----- -GET _data_stream/dsl-data-stream ----- -// TEST[continued] - -[source,console-result] ----- -{ - "data_streams": [ - { - "name": "dsl-data-stream", - "timestamp_field": { - "name": "@timestamp" - }, - "indices": [ - { - "index_name": ".ds-dsl-data-stream-2023.10.19-000001", - "index_uuid": "xCEhwsp8Tey0-FLNFYVwSg", - "prefer_ilm": true, - "ilm_policy": "pre-dsl-ilm-policy", - "managed_by": "Index Lifecycle Management" <1> - }, - { - "index_name": ".ds-dsl-data-stream-2023.10.19-000002", - "index_uuid": "PA_JquKGSiKcAKBA8DJ5gw", - "prefer_ilm": true, - "ilm_policy": "pre-dsl-ilm-policy", - "managed_by": "Index Lifecycle Management" <2> - }, - { - "index_name": ".ds-dsl-data-stream-2023.10.19-000003", - "index_uuid": "PA_JquKGSiKcAKBA8abcd1", - "prefer_ilm": false, <3> - "ilm_policy": "pre-dsl-ilm-policy", - "managed_by": "Data stream lifecycle" <4> - } - ], - "generation": 3, - "status": "GREEN", - "template": "dsl-data-stream-template", - "lifecycle": { - "enabled": true, - "data_retention": "7d", - "effective_retention": "7d", - "retention_determined_by": "data_stream_configuration" - }, - "ilm_policy": "pre-dsl-ilm-policy", - "next_generation_managed_by": "Data stream lifecycle", - "prefer_ilm": false, - "hidden": false, - "system": false, - "allow_custom_routing": false, - "replicated": false, - "rollover_on_write": false - } - ] -} ----- -// TESTRESPONSE[s/"index_name": ".ds-dsl-data-stream-2023.10.19-000001"/"index_name": $body.data_streams.0.indices.0.index_name/] -// TESTRESPONSE[s/"index_uuid": "xCEhwsp8Tey0-FLNFYVwSg"/"index_uuid": $body.data_streams.0.indices.0.index_uuid/] -// TESTRESPONSE[s/"index_name": ".ds-dsl-data-stream-2023.10.19-000002"/"index_name": $body.data_streams.0.indices.1.index_name/] -// TESTRESPONSE[s/"index_uuid": "PA_JquKGSiKcAKBA8DJ5gw"/"index_uuid": $body.data_streams.0.indices.1.index_uuid/] -// TESTRESPONSE[s/"index_name": ".ds-dsl-data-stream-2023.10.19-000003"/"index_name": $body.data_streams.0.indices.2.index_name/] -// TESTRESPONSE[s/"index_uuid": "PA_JquKGSiKcAKBA8abcd1"/"index_uuid": $body.data_streams.0.indices.2.index_uuid/] -// TESTRESPONSE[s/"status": "GREEN"/"status": "YELLOW","failure_store":{"enabled": false, "indices": [], "rollover_on_write": true}/] - -<1> The backing indices that existed before rollover will continue to be managed by {ilm-init} -<2> The backing indices that existed before rollover will continue to be managed by {ilm-init} -<3> The new write index received the `false` value for the `prefer_ilm` setting, as we configured -in the index template -<4> The new write index is managed by `Data stream lifecycle` - -[discrete] -[[migrate-from-dsl-to-ilm]] -==== Migrate data stream back to ILM -We can easily change this data stream to be managed by {ilm-init} because we didn't remove -the {ilm-init} policy when we <>. - -We can achieve this in two ways: - -1. <> from the data streams -2. Disable data stream lifecycle by configuring the `enabled` flag to `false`. - -Let's implement option 2 and disable the data stream lifecycle: - -[source,console] ----- -PUT _data_stream/dsl-data-stream/_lifecycle -{ - "data_retention": "7d", - "enabled": false <1> -} ----- -// TEST[continued] -<1> The `enabled` flag can be ommitted and defaults to `true` however, here we -explicitly configure it to `false` -Let's check the state of the data stream: - -[source,console] ----- -GET _data_stream/dsl-data-stream ----- -// TEST[continued] - -[source,console-result] ----- -{ - "data_streams": [ - { - "name": "dsl-data-stream", - "timestamp_field": { - "name": "@timestamp" - }, - "indices": [ - { - "index_name": ".ds-dsl-data-stream-2023.10.19-000001", - "index_uuid": "xCEhwsp8Tey0-FLNFYVwSg", - "prefer_ilm": true, - "ilm_policy": "pre-dsl-ilm-policy", - "managed_by": "Index Lifecycle Management" - }, - { - "index_name": ".ds-dsl-data-stream-2023.10.19-000002", - "index_uuid": "PA_JquKGSiKcAKBA8DJ5gw", - "prefer_ilm": true, - "ilm_policy": "pre-dsl-ilm-policy", - "managed_by": "Index Lifecycle Management" - }, - { - "index_name": ".ds-dsl-data-stream-2023.10.19-000003", - "index_uuid": "PA_JquKGSiKcAKBA8abcd1", - "prefer_ilm": false, - "ilm_policy": "pre-dsl-ilm-policy", - "managed_by": "Index Lifecycle Management" <1> - } - ], - "generation": 3, - "status": "GREEN", - "template": "dsl-data-stream-template", - "lifecycle": { - "enabled": false, <2> - "data_retention": "7d" - }, - "ilm_policy": "pre-dsl-ilm-policy", - "next_generation_managed_by": "Index Lifecycle Management", <3> - "prefer_ilm": false, - "hidden": false, - "system": false, - "allow_custom_routing": false, - "replicated": false, - "rollover_on_write": false - } - ] -} ----- -// TESTRESPONSE[s/"index_name": ".ds-dsl-data-stream-2023.10.19-000001"/"index_name": $body.data_streams.0.indices.0.index_name/] -// TESTRESPONSE[s/"index_uuid": "xCEhwsp8Tey0-FLNFYVwSg"/"index_uuid": $body.data_streams.0.indices.0.index_uuid/] -// TESTRESPONSE[s/"index_name": ".ds-dsl-data-stream-2023.10.19-000002"/"index_name": $body.data_streams.0.indices.1.index_name/] -// TESTRESPONSE[s/"index_uuid": "PA_JquKGSiKcAKBA8DJ5gw"/"index_uuid": $body.data_streams.0.indices.1.index_uuid/] -// TESTRESPONSE[s/"index_name": ".ds-dsl-data-stream-2023.10.19-000003"/"index_name": $body.data_streams.0.indices.2.index_name/] -// TESTRESPONSE[s/"index_uuid": "PA_JquKGSiKcAKBA8abcd1"/"index_uuid": $body.data_streams.0.indices.2.index_uuid/] -// TESTRESPONSE[s/"status": "GREEN"/"status": "YELLOW","failure_store":{"enabled": false, "indices": [], "rollover_on_write": true}/] -<1> The write index is now managed by {ilm-init} -<2> The `lifecycle` configured on the data stream is now disabled. -<3> The next write index will be managed by {ilm-init} - -Had we removed the {ilm-init} policy from the index template when we <> -it, the write index of the data stream will now be `Unmanaged` because the index -wouldn't have the {ilm-init} policy configured to fallback onto. - -////////////////////////// -[source,console] --------------------------------------------------- -DELETE _data_stream/dsl-data-stream -DELETE _index_template/dsl-data-stream-template -DELETE _ilm/policy/pre-dsl-ilm-policy --------------------------------------------------- -// TEST[continued] - -////////////////////////// - diff --git a/docs/reference/data-streams/logs.asciidoc b/docs/reference/data-streams/logs.asciidoc deleted file mode 100644 index 797efb7bef945..0000000000000 --- a/docs/reference/data-streams/logs.asciidoc +++ /dev/null @@ -1,245 +0,0 @@ -[[logs-data-stream]] -== Logs data stream - -IMPORTANT: The {es} `logsdb` index mode is generally available in Elastic Cloud Hosted -and self-managed Elasticsearch as of version 8.17, and is enabled by default for -logs in https://www.elastic.co/elasticsearch/serverless[{serverless-full}]. - -A logs data stream is a data stream type that stores log data more efficiently. - -In benchmarks, log data stored in a logs data stream used ~2.5 times less disk space than a regular data -stream. The exact impact varies by data set. - -[discrete] -[[how-to-use-logsds]] -=== Create a logs data stream - -To create a logs data stream, set your <> `index.mode` to `logsdb`: - -[source,console] ----- -PUT _index_template/my-index-template -{ - "index_patterns": ["logs-*"], - "data_stream": { }, - "template": { - "settings": { - "index.mode": "logsdb" <1> - } - }, - "priority": 101 <2> -} ----- -// TEST - -<1> The index mode setting. -<2> The index template priority. By default, Elasticsearch ships with a `logs-*-*` index template with a priority of 100. To make sure your index template takes priority over the default `logs-*-*` template, set its `priority` to a number higher than 100. For more information, see <>. - -After the index template is created, new indices that use the template will be configured as a logs data stream. You can start indexing data and <>. - -You can also set the index mode and adjust other template settings in <>. - -//// -[source,console] ----- -DELETE _index_template/my-index-template ----- -// TEST[continued] -//// - -[[logsdb-default-settings]] - -[discrete] -[[logsdb-synthetic-source]] -=== Synthetic source - -If you have the required https://www.elastic.co/subscriptions[subscription], `logsdb` index mode uses <>, which omits storing the original `_source` -field. Instead, the document source is synthesized from doc values or stored fields upon document retrieval. - -If you don't have the required https://www.elastic.co/subscriptions[subscription], `logsdb` mode uses the original `_source` field. - -Before using synthetic source, make sure to review the <>. - -When working with multi-value fields, the `index.mapping.synthetic_source_keep` setting controls how field values -are preserved for <> reconstruction. In `logsdb`, the default value is `arrays`, -which retains both duplicate values and the order of entries. However, the exact structure of -array elements and objects is not necessarily retained. Preserving duplicates and ordering can be critical for some -log fields, such as DNS A records, HTTP headers, and log entries that represent sequential or repeated events. - -[discrete] -[[logsdb-sort-settings]] -=== Index sort settings - -In `logsdb` index mode, indices are sorted by the fields `host.name` and `@timestamp` by default. - -* If the `@timestamp` field is not present, it is automatically injected. -* If the `host.name` field is not present, it is automatically injected as a `keyword` field, if possible. -** If `host.name` can't be injected (for example, `host` is a keyword field) or can't be used for sorting -(for example, its value is an IP address), only the `@timestamp` is used for sorting. -** If `host.name` is injected and `subobjects` is set to `true` (default), the `host` field is mapped as -an object field named `host` with a `name` child field of type `keyword`. If `subobjects` is set to `false`, -a single `host.name` field is mapped as a `keyword` field. -* To prioritize the latest data, `host.name` is sorted in ascending order and `@timestamp` is sorted in -descending order. - -You can override the default sort settings by manually configuring `index.sort.field` -and `index.sort.order`. For more details, see <>. - -To modify the sort configuration of an existing data stream, update the data stream's -component templates, and then perform or wait for a <>. - -NOTE: If you apply custom sort settings, the `@timestamp` field is injected into the mappings but is not -automatically added to the list of sort fields. For best results, include it manually as the last sort -field, with `desc` ordering. - -[discrete] -[[logsdb-host-name]] -==== Existing data streams - -If you're enabling `logsdb` index mode on a data stream that already exists, make sure to check mappings and sorting. The `logsdb` mode automatically maps `host.name` as a keyword if it's included in the sort settings. If a `host.name` field already exists but has a different type, mapping errors might occur, preventing `logsdb` mode from being fully applied. - -To avoid mapping conflicts, consider these options: - -* **Adjust mappings:** Check your existing mappings to ensure that `host.name` is mapped as a keyword. - -* **Change sorting:** If needed, you can remove `host.name` from the sort settings and use a different set of fields. Sorting by `@timestamp` can be a good fallback. - -* **Switch to a different <>**: If resolving `host.name` mapping conflicts is not feasible, you can choose not to use `logsdb` mode. - -IMPORTANT: On existing data streams, `logsdb` mode is applied on <> (automatic or manual). - -[discrete] -[[logsdb-sort-routing]] -==== Optimized routing on sort fields - -To reduce the storage footprint of `logsdb` indexes, you can enable routing optimizations. A routing optimization uses the fields in the sort configuration (except for `@timestamp`) to route documents to shards. - -In benchmarks, -routing optimizations reduced storage requirements by 20% compared to the default `logsdb` configuration, with a negligible penalty to ingestion -performance (1-4%). Routing optimizations can benefit data streams that are expected to grow substantially over -time. Exact results depend on the sort configuration and the nature of the logged data. - -To configure a routing optimization: - - * Include the index setting `[index.logsdb.route_on_sort_fields:true]` in the data stream configuration. - * <> with two or more fields, in addition to `@timestamp`. - * Make sure the <> field is not populated in ingested documents. It should be - auto-generated instead. - -A custom sort configuration is required, to improve storage efficiency and to minimize hotspots -from logging spikes that may route documents to a single shard. For best results, use a few sort fields -that have a relatively low cardinality and don't co-vary (for example, `host.name` and `host.id` are not optimal). - -[discrete] -[[logsdb-specialized-codecs]] -=== Specialized codecs - -By default, `logsdb` index mode uses the `best_compression` <>, which applies {wikipedia}/Zstd[ZSTD] -compression to stored fields. You can switch to the `default` codec for faster compression with a slightly larger storage footprint. - -The `logsdb` index mode also automatically applies specialized codecs for numeric doc values, in order to optimize storage usage. Numeric fields are -encoded using the following sequence of codecs: - -* **Delta encoding**: - Stores the difference between consecutive values instead of the actual values. - -* **Offset encoding**: - Stores the difference from a base value rather than between consecutive values. - -* **Greatest Common Divisor (GCD) encoding**: - Finds the greatest common divisor of a set of values and stores the differences as multiples of the GCD. - -* **Frame Of Reference (FOR) encoding**: - Determines the smallest number of bits required to encode a block of values and uses - bit-packing to fit such values into larger 64-bit blocks. - -Each encoding is evaluated according to heuristics determined by the data distribution. -For example, the algorithm checks whether the data is monotonically non-decreasing or -non-increasing. If so, delta encoding is applied; otherwise, the process -continues with the next encoding method (offset). - -Encoding is specific to each Lucene segment and is reapplied when segments are merged. The merged Lucene segment -might use a different encoding than the original segments, depending on the characteristics of the merged data. - -For keyword fields, **Run Length Encoding (RLE)** is applied to the ordinals, which represent positions in the Lucene -segment-level keyword dictionary. This compression is used when multiple consecutive documents share the same keyword. - -[discrete] -[[logsdb-ignored-settings]] -=== `ignore` settings - -The `logsdb` index mode uses the following `ignore` settings. You can override these settings as needed. - -[discrete] -[[logsdb-ignore-malformed]] -==== `ignore_malformed` - -By default, `logsdb` index mode sets `ignore_malformed` to `true`. With this setting, documents with malformed fields -can be indexed without causing ingestion failures. - -[discrete] -[[logs-db-ignore-above]] -==== `ignore_above` - -In `logsdb` index mode, the `index.mapping.ignore_above` setting is applied by default at the index level to ensure -efficient storage and indexing of large keyword fields.The index-level default for `ignore_above` is 8191 -_characters._ Using UTF-8 encoding, this results in a limit of 32764 bytes, depending on character encoding. - -The mapping-level `ignore_above` setting takes precedence. If a specific field has an `ignore_above` value -defined in its mapping, that value overrides the index-level `index.mapping.ignore_above` value. This default -behavior helps to optimize indexing performance by preventing excessively large string values from being indexed. - -If you need to customize the limit, you can override it at the mapping level or change the index level default. - -[discrete] -[[logs-db-ignore-limit]] -==== `ignore_dynamic_beyond_limit` - -In `logsdb` index mode, the setting `index.mapping.total_fields.ignore_dynamic_beyond_limit` is set to `true` by -default. This setting allows dynamically mapped fields to be added on top of statically defined fields, even when the total number of fields exceeds the `index.mapping.total_fields.limit`. Instead of triggering an index failure, additional dynamically mapped fields are ignored so that ingestion can continue. - -NOTE: When automatically injected, `host.name` and `@timestamp` count toward the limit of mapped fields. If `host.name` is mapped with `subobjects: true`, it has two fields. When mapped with `subobjects: false`, `host.name` has only one field. - -[discrete] -[[logsdb-nodocvalue-fields]] -=== Fields without `doc_values` - -When the `logsdb` index mode uses synthetic `_source` and `doc_values` are disabled for a field in the mapping, -{es} might set the `store` setting to `true` for that field. This ensures that the field's -data remains accessible for reconstructing the document's source when using -<>. - -For example, this adjustment occurs with text fields when `store` is `false` and no suitable multi-field is available for -reconstructing the original value. - -[discrete] -[[logsdb-settings-summary]] -=== Settings reference - -The `logsdb` index mode uses the following settings: - -* **`index.mode`**: `"logsdb"` - -* **`index.mapping.synthetic_source_keep`**: `"arrays"` - -* **`index.sort.field`**: `["host.name", "@timestamp"]` - -* **`index.sort.order`**: `["desc", "desc"]` - -* **`index.sort.mode`**: `["min", "min"]` - -* **`index.sort.missing`**: `["_first", "_first"]` - -* **`index.codec`**: `"best_compression"` - -* **`index.mapping.ignore_malformed`**: `true` - -* **`index.mapping.ignore_above`**: `8191` - -* **`index.mapping.total_fields.ignore_dynamic_beyond_limit`**: `true` - -[discrete] -[[upgrade-to-logsdb-notes]] -=== Notes about upgrading to Logsdb - -TODO: add notes. diff --git a/docs/reference/data-streams/modify-data-streams-api.asciidoc b/docs/reference/data-streams/modify-data-streams-api.asciidoc deleted file mode 100644 index 2f717f9ec3b4b..0000000000000 --- a/docs/reference/data-streams/modify-data-streams-api.asciidoc +++ /dev/null @@ -1,84 +0,0 @@ -[[modify-data-streams-api]] -=== Modify data streams API -++++ -Modify data streams -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-data-stream[Data stream APIs]. --- - -Performs one or more <> modification actions in a single atomic -operation. - -[source,console] ----- -POST _data_stream/_modify -{ - "actions": [ - { - "remove_backing_index": { - "data_stream": "my-logs", - "index": ".ds-my-logs-2099.01.01-000001" - } - }, - { - "add_backing_index": { - "data_stream": "my-logs", - "index": "index-to-add" - } - } - ] -} ----- -// TEST[skip:cannot determine backing index name] - -[[modify-data-streams-api-request]] -==== {api-request-title} - -`POST /_data_stream/_modify` - -[role="child_attributes"] -[[modify-data-streams-api-request-body]] -==== {api-request-body-title} - -`actions`:: -(Required, array of objects) Actions to perform. -+ -.Properties of `actions` objects -[%collapsible%open] -==== -``:: -(Required, object) The key is the action type. At least one action is required. -+ -.Valid `` keys -[%collapsible%open] -===== -`add_backing_index`:: -Adds an existing index as a backing index for a data stream. The index is -hidden as part of this operation. - -WARNING: Adding indices with the `add_backing_index` action -can potentially result in improper data stream behavior. -This should be considered an expert level API. - -`remove_backing_index`:: -Removes a backing index from a data stream. The index is unhidden -as part of this operation. A data stream's write index cannot be removed. - -===== -+ -The object body contains options for the action. -+ -.Properties of `` -[%collapsible%open] -===== -`data_stream`:: -(Required*, string) Data stream targeted by the action. - -`index`:: -(Required*, string) Index for the action. -===== -==== diff --git a/docs/reference/data-streams/promote-data-stream-api.asciidoc b/docs/reference/data-streams/promote-data-stream-api.asciidoc deleted file mode 100644 index 33005e80e9408..0000000000000 --- a/docs/reference/data-streams/promote-data-stream-api.asciidoc +++ /dev/null @@ -1,53 +0,0 @@ -[role="xpack"] -[[promote-data-stream-api]] -=== Promote data stream API -++++ -Promote data stream -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-data-stream[Data stream APIs]. --- - -The purpose of the promote <> API is to turn -a data stream that is replicated by CCR into a regular -data stream. - -Via CCR Auto Following, a data stream from a remote cluster -can be replicated to the local cluster. These data streams -can't be rolled over in the local cluster. Only if the upstream -data stream rolls over then these replicated data streams roll -over as well. In the event that the remote cluster is no longer -available, the data stream in the local cluster can be promoted -to a regular data stream, which allows these data streams to -be rolled over in the local cluster. - -NOTE: When promoting a data stream, ensure the local cluster has a data stream enabled index template that matches the data stream. -If this is missing, the data stream will not be able to roll over until a matching index template is created. -This will affect the lifecycle management of the data stream and interfere with the data stream size and retention. - -[source,console] ----- -POST /_data_stream/_promote/my-data-stream ----- -// TEST[catch:missing] - -[[promote-data-stream-api-request]] -==== {api-request-title} - -`POST /_data_stream/_promote/` - -[[promote-data-stream-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `manage_follow_index` -<> to use this API. - -[[promote-data-stream-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) -The name of the data stream to promote. diff --git a/docs/reference/data-streams/set-up-a-data-stream.asciidoc b/docs/reference/data-streams/set-up-a-data-stream.asciidoc deleted file mode 100644 index a8cbbeac06077..0000000000000 --- a/docs/reference/data-streams/set-up-a-data-stream.asciidoc +++ /dev/null @@ -1,391 +0,0 @@ -[role="xpack"] -[[set-up-a-data-stream]] -== Set up a data stream - -To set up a data stream, follow these steps: - -. <> -. <> -. <> -. <> -. <> - -You can also <>. - -[IMPORTANT] --- -If you use {fleet}, {agent}, or {ls}, skip this tutorial. -They all set up data streams for you. - -For {fleet} and {agent}, check out this {fleet-guide}/data-streams.html[data streams documentation]. -For {ls}, check out the -{logstash-ref}/plugins-outputs-elasticsearch.html#plugins-outputs-elasticsearch-data_stream[data streams settings] -for the `elasticsearch output` plugin. --- - -[discrete] -[[create-index-lifecycle-policy]] -=== Create an index lifecycle policy - -While optional, we recommend using {ilm-init} to automate the management of your -data stream's backing indices. {ilm-init} requires an index lifecycle policy. - -To create an index lifecycle policy in {kib}, open the main menu and go to -*Stack Management > Index Lifecycle Policies*. Click *Create policy*. - -You can also use the <>. - -//// -[source,console] --------------------------------------------------- -PUT /_snapshot/found-snapshots -{ - "type": "fs", - "settings": { - "location": "my_backup_location" - } -} --------------------------------------------------- -// TESTSETUP -//// - -// tag::ilm-policy-api-ex[] -[source,console] ----- -PUT _ilm/policy/my-lifecycle-policy -{ - "policy": { - "phases": { - "hot": { - "actions": { - "rollover": { - "max_primary_shard_size": "50gb" - } - } - }, - "warm": { - "min_age": "30d", - "actions": { - "shrink": { - "number_of_shards": 1 - }, - "forcemerge": { - "max_num_segments": 1 - } - } - }, - "cold": { - "min_age": "60d", - "actions": { - "searchable_snapshot": { - "snapshot_repository": "found-snapshots" - } - } - }, - "frozen": { - "min_age": "90d", - "actions": { - "searchable_snapshot": { - "snapshot_repository": "found-snapshots" - } - } - }, - "delete": { - "min_age": "735d", - "actions": { - "delete": {} - } - } - } - } -} ----- -// end::ilm-policy-api-ex[] - -[discrete] -[[create-component-templates]] -=== Create component templates - -// tag::ds-create-component-templates[] -A data stream requires a matching index template. In most cases, you compose -this index template using one or more component templates. You typically use -separate component templates for mappings and index settings. This lets you -reuse the component templates in multiple index templates. - -When creating your component templates, include: - -* A <> or <> mapping for the `@timestamp` -field. If you don't specify a mapping, {es} maps `@timestamp` as a `date` field -with default options. - -* Your lifecycle policy in the `index.lifecycle.name` index setting. - -[TIP] -==== -Use the {ecs-ref}[Elastic Common Schema (ECS)] when mapping your fields. ECS -fields integrate with several {stack} features by default. - -If you're unsure how to map your fields, use <> to extract fields from <> at search time. For example, you can index a log message to a -`wildcard` field and later extract IP addresses and other data from this field -during a search. -==== - -To create a component template in {kib}, open the main menu and go to *Stack -Management > Index Management*. In the *Index Templates* view, click *Create -component template*. - -You can also use the <>. - -[source,console] ----- -# Creates a component template for mappings -PUT _component_template/my-mappings -{ - "template": { - "mappings": { - "properties": { - "@timestamp": { - "type": "date", - "format": "date_optional_time||epoch_millis" - }, - "message": { - "type": "wildcard" - } - } - } - }, - "_meta": { - "description": "Mappings for @timestamp and message fields", - "my-custom-meta-field": "More arbitrary metadata" - } -} - -# Creates a component template for index settings -PUT _component_template/my-settings -{ - "template": { - "settings": { - "index.lifecycle.name": "my-lifecycle-policy" - } - }, - "_meta": { - "description": "Settings for ILM", - "my-custom-meta-field": "More arbitrary metadata" - } -} ----- -// TEST[continued] -// end::ds-create-component-templates[] - -[discrete] -[[create-index-template]] -=== Create an index template - -// tag::ds-create-index-template[] -Use your component templates to create an index template. Specify: - -* One or more index patterns that match the data stream's name. We recommend -using our {fleet-guide}/data-streams.html#data-streams-naming-scheme[data stream -naming scheme]. - -* That the template is data stream enabled. - -* Any component templates that contain your mappings and index settings. - -* A priority higher than `200` to avoid collisions with built-in templates. -See <>. - -To create an index template in {kib}, open the main menu and go to *Stack -Management > Index Management*. In the *Index Templates* view, click *Create -template*. - -You can also use the <>. -Include the `data_stream` object to enable data streams. - -[source,console] ----- -PUT _index_template/my-index-template -{ - "index_patterns": ["my-data-stream*"], - "data_stream": { }, - "composed_of": [ "my-mappings", "my-settings" ], - "priority": 500, - "_meta": { - "description": "Template for my time series data", - "my-custom-meta-field": "More arbitrary metadata" - } -} ----- -// TEST[continued] -// end::ds-create-index-template[] - -[discrete] -[[create-data-stream]] -=== Create the data stream - -// tag::ds-create-data-stream[] -<> add documents to a data -stream. These requests must use an `op_type` of `create`. Documents must include -a `@timestamp` field. - -To automatically create your data stream, submit an indexing request that -targets the stream's name. This name must match one of your index template's -index patterns. - -[source,console] ----- -PUT my-data-stream/_bulk -{ "create":{ } } -{ "@timestamp": "2099-05-06T16:21:15.000Z", "message": "192.0.2.42 - - [06/May/2099:16:21:15 +0000] \"GET /images/bg.jpg HTTP/1.0\" 200 24736" } -{ "create":{ } } -{ "@timestamp": "2099-05-06T16:25:42.000Z", "message": "192.0.2.255 - - [06/May/2099:16:25:42 +0000] \"GET /favicon.ico HTTP/1.0\" 200 3638" } - -POST my-data-stream/_doc -{ - "@timestamp": "2099-05-06T16:21:15.000Z", - "message": "192.0.2.42 - - [06/May/2099:16:21:15 +0000] \"GET /images/bg.jpg HTTP/1.0\" 200 24736" -} ----- -// TEST[continued] -// end::ds-create-data-stream[] - -You can also manually create the stream using the -<>. The stream's name must -still match one of your template's index patterns. - -[source,console] ----- -PUT _data_stream/my-data-stream ----- -// TEST[continued] -// TEST[s/my-data-stream/my-data-stream-alt/] - -[discrete] -[[secure-data-stream]] -=== Secure the data stream - -include::{es-ref-dir}/security/authorization/alias-privileges.asciidoc[tag=data-stream-security] - -For an example, see <>. - -[discrete] -[[convert-index-alias-to-data-stream]] -=== Convert an index alias to a data stream - -// tag::time-series-alias-tip[] -Prior to {es} 7.9, you'd typically use an -<> -to manage time series data. Data streams replace this functionality, require -less maintenance, and automatically integrate with <>. -// end::time-series-alias-tip[] - -To convert an index alias with a write index to a data stream with the same -name, use the <>. -During conversion, the alias’s indices become hidden backing indices for the -stream. The alias’s write index becomes the stream’s write index. The stream -still requires a matching index template with data stream enabled. - -//// -[source,console] ----- -POST idx1/_doc/ -{ - "message" : "testing", - "@timestamp" : "2099-01-01" -} - -POST idx2/_doc/ -{ - "message" : "testing2", - "@timestamp" : "2099-01-01" -} - -POST _aliases -{ - "actions": [ - { - "add": { - "index": "idx1", - "alias": "my-time-series-data", - "is_write_index": true - } - }, - { - "add": { - "index": "idx2", - "alias": "my-time-series-data" - } - } - ] -} - -PUT _index_template/template -{ - "index_patterns": ["my-time-series-data"], - "data_stream": { } -} ----- -// TEST[continued] -//// - -[source,console] ----- -POST _data_stream/_migrate/my-time-series-data ----- -// TEST[continued] - -[discrete] -[[get-info-about-data-stream]] -=== Get information about a data stream - -To get information about a data stream in {kib}, open the main menu and go to -*Stack Management > Index Management*. In the *Data Streams* view, click the -data stream's name. - -You can also use the <>. - -//// -[source,console] ----- -POST my-data-stream/_rollover/ ----- -// TEST[continued] -//// - -[source,console] ----- -GET _data_stream/my-data-stream ----- -// TEST[continued] - -[discrete] -[[delete-data-stream]] -=== Delete a data stream - -To delete a data stream and its backing indices in {kib}, open the main menu and -go to *Stack Management > Index Management*. In the *Data Streams* view, click -the trash icon. The icon only displays if you have the `delete_index` -<> for the data stream. - -You can also use the <>. - -[source,console] ----- -DELETE _data_stream/my-data-stream ----- -// TEST[continued] - -//// -[source,console] ----- -DELETE _data_stream/* -DELETE _index_template/* -DELETE _component_template/my-* -DELETE _ilm/policy/my-lifecycle-policy ----- -// TEST[continued] -//// diff --git a/docs/reference/data-streams/set-up-tsds.asciidoc b/docs/reference/data-streams/set-up-tsds.asciidoc deleted file mode 100644 index d082a9c4eebeb..0000000000000 --- a/docs/reference/data-streams/set-up-tsds.asciidoc +++ /dev/null @@ -1,304 +0,0 @@ -[[set-up-tsds]] -=== Set up a time series data stream (TSDS) -++++ -Set up a TSDS -++++ - -To set up a <>, follow these steps: - -. Check the <>. -. <>. -. <>. -. <>. -. <>. - -[discrete] -[[tsds-prereqs]] -==== Prerequisites - -* Before you create a TSDS, you should be familiar with <> and <>. - -* To follow this tutorial, you must have the following permissions: - -** <>: `manage_ilm` and -`manage_index_templates`. -** <>: `create_doc` and `create_index` -for any TSDS you create or convert. To roll over a TSDS, you must have the -`manage` privilege. - -[discrete] -[[tsds-ilm-policy]] -==== Create an index lifecycle policy - -While optional, we recommend using {ilm-init} to automate the management of your -TSDS's backing indices. {ilm-init} requires an index lifecycle policy. - -We recommend you specify a `max_age` criteria for the `rollover` action in the -policy. This ensures the <> for the -TSDS's backing indices are consistent. For example, setting a `max_age` of `1d` -for the `rollover` action ensures your backing indices consistently contain one -day's worth of data. - -//// -[source,console] ----- -PUT /_snapshot/found-snapshots -{ - "type": "fs", - "settings": { - "location": "my_backup_location" - } -} ----- -// TESTSETUP -//// - -[source,console] ----- -PUT _ilm/policy/my-weather-sensor-lifecycle-policy -{ - "policy": { - "phases": { - "hot": { - "actions": { - "rollover": { - "max_age": "1d", - "max_primary_shard_size": "50gb" - } - } - }, - "warm": { - "min_age": "30d", - "actions": { - "shrink": { - "number_of_shards": 1 - }, - "forcemerge": { - "max_num_segments": 1 - } - } - }, - "cold": { - "min_age": "60d", - "actions": { - "searchable_snapshot": { - "snapshot_repository": "found-snapshots" - } - } - }, - "frozen": { - "min_age": "90d", - "actions": { - "searchable_snapshot": { - "snapshot_repository": "found-snapshots" - } - } - }, - "delete": { - "min_age": "735d", - "actions": { - "delete": {} - } - } - } - } -} ----- - -[discrete] -[[create-tsds-index-template]] -==== Create an index template - -To setup a TSDS create an index template with the following details: - -* One or more index patterns that match the TSDS's name. We recommend -using our {fleet-guide}/data-streams.html#data-streams-naming-scheme[data stream -naming scheme]. - -* Enable data streams. - -* Specify a mapping that defines your dimensions and metrics: - -** One or more <> with a `time_series_dimension` value of `true`. - Alternatively, one or more <> fields configured as dimension containers, - provided that they will contain at least one sub-field (mapped statically or dynamically). - -** One or more <>, marked using the `time_series_metric` mapping parameter. - -** Optional: A `date` or `date_nanos` mapping for the `@timestamp` field. If you don’t specify a mapping, - Elasticsearch maps `@timestamp` as a `date` field with default options. - -* Define index settings: - -** Set `index.mode` setting to `time_series`. - -** Your lifecycle policy in the `index.lifecycle.name` index setting. - -** Optional: Other index settings, such as <>, - for your TSDS's backing indices. - -* A priority higher than `200` to avoid collisions with built-in templates. -See <>. - -* Optional: Component templates containing your mappings and other index settings. - -[source,console] ----- -PUT _index_template/my-weather-sensor-index-template -{ - "index_patterns": ["metrics-weather_sensors-*"], - "data_stream": { }, - "template": { - "settings": { - "index.mode": "time_series", - "index.lifecycle.name": "my-lifecycle-policy" - }, - "mappings": { - "properties": { - "sensor_id": { - "type": "keyword", - "time_series_dimension": true - }, - "location": { - "type": "keyword", - "time_series_dimension": true - }, - "temperature": { - "type": "half_float", - "time_series_metric": "gauge" - }, - "humidity": { - "type": "half_float", - "time_series_metric": "gauge" - }, - "@timestamp": { - "type": "date" - } - } - } - }, - "priority": 500, - "_meta": { - "description": "Template for my weather sensor data" - } -} ----- -// TEST[continued] - -//// -[source,console] ----- -DELETE _data_stream/* -DELETE _index_template/* -DELETE _ilm/policy/my-weather-sensor-lifecycle-policy ----- -// TEST[continued] -//// - -[discrete] -[[create-tsds]] -==== Create the TSDS - -<> add documents to a TSDS. -Documents in a TSDS must include: - -* A `@timestamp` field -* One or more dimension fields. At least one dimension must match the `index.routing_path` index setting, -if specified. If not specified explicitly, `index.routing_path` is set automatically to whichever mappings have -`time_series_dimension` set to `true`. - -To automatically create your TSDS, submit an indexing request that -targets the TSDS's name. This name must match one of your index template's -index patterns. - -IMPORTANT: To test the following example, update the timestamps to within three hours of -your current time. Data added to a TSDS must always fall within an -<>. - -[source,console] ----- -PUT metrics-weather_sensors-dev/_bulk -{ "create":{ } } -{ "@timestamp": "2099-05-06T16:21:15.000Z", "sensor_id": "HAL-000001", "location": "plains", "temperature": 26.7,"humidity": 49.9 } -{ "create":{ } } -{ "@timestamp": "2099-05-06T16:25:42.000Z", "sensor_id": "SYKENET-000001", "location": "swamp", "temperature": 32.4, "humidity": 88.9 } - -POST metrics-weather_sensors-dev/_doc -{ - "@timestamp": "2099-05-06T16:21:15.000Z", - "sensor_id": "SYKENET-000001", - "location": "swamp", - "temperature": 32.4, - "humidity": 88.9 -} ----- -// TEST[skip: The @timestamp value won't match an accepted range in the TSDS] - -You can also manually create the TSDS using the -<>. The TSDS's name must -still match one of your template's index patterns. - -[source,console] ----- -PUT _data_stream/metrics-weather_sensors-dev ----- -// TEST[setup:tsds_template] -// TEST[teardown:tsds_cleanup] - -[discrete] -[[secure-tsds]] -==== Secure the TSDS - -Use <> to control access to a TSDS. -Granting privileges on a TSDS grants the same privileges on its backing indices. - -For an example, refer to <>. - -[discrete] -[[convert-existing-data-stream-to-tsds]] -==== Convert an existing data stream to a TSDS - -You can also use the above steps to convert an existing regular data stream to -a TSDS. In this case, you'll want to: - -* Edit your existing index lifecycle policy, component templates, and index -templates instead of creating new ones. - -* Instead of creating the TSDS, manually roll over its write index. This ensures -the current write index and any new backing indices have an -<>. -+ -You can manually roll over the write index using the -<>. -+ -[source,console] ----- -POST metrics-weather_sensors-dev/_rollover ----- -// TEST[setup:tsds] -// TEST[teardown:tsds_cleanup] - -[discrete] -[[set-up-component-templates]] -==== A note about component templates and index.mode setting - -Configuring a TSDS via an index template that uses component templates is a bit more complicated. -Typically with component templates mappings and settings get scattered across multiple component templates. -If the `index.routing_path` is defined, the fields it references need to be defined in the same component -template with the `time_series_dimension` attribute enabled. - -The reasons for this is that each component template needs to be valid on its own. When configuring the -`index.mode` setting in an index template, the `index.routing_path` setting is configured automatically. -It is derived from the field mappings with `time_series_dimension` attribute enabled. - -[discrete] -[[set-up-tsds-whats-next]] -==== What's next? - -Now that you've set up your TSDS, you can manage and use it like a regular -data stream. For more information, refer to: - -* <> -* <> -* <> diff --git a/docs/reference/data-streams/tsds-index-settings.asciidoc b/docs/reference/data-streams/tsds-index-settings.asciidoc deleted file mode 100644 index 3ecfc60c90f58..0000000000000 --- a/docs/reference/data-streams/tsds-index-settings.asciidoc +++ /dev/null @@ -1,64 +0,0 @@ -[[tsds-index-settings]] -=== Time series index settings - -Backing indices in a <> support the -following index settings. - -[[index-mode]] -`index.mode`:: -(<<_static_index_settings,Static>>, string) Mode for the index. -Valid values are <> and `null` (no mode). -Defaults to `null`. - -[[index-time-series-start-time]] -`index.time_series.start_time`:: -(<<_static_index_settings,Static>>, string) Earliest `@timestamp` -value (inclusive) accepted by the index. Only indices with an `index.mode` of -<> support this setting. For more information, -refer to <>. - -[[index-time-series-end-time]] -`index.time_series.end_time`:: -(<>, string) Latest `@timestamp` -value (exclusive) accepted by the index. Only indices with an `index.mode` of -`time_series` support this setting. For more information, refer to -<>. - -[[index-look-ahead-time]] -`index.look_ahead_time`:: -(<<_static_index_settings,Static>>, <>) -Interval used to calculate the `index.time_series.end_time` for a TSDS's write -index. Defaults to `30m` (30 minutes). Accepts `1m` (one minute) to `2h` (two -hours). Only indices with an `index.mode` of `time_series` support this setting. -For more information, refer to <>. Additionally this setting -can not be less than `time_series.poll_interval` cluster setting. - -NOTE: Increasing the `look_ahead_time` will also increase the amount of time {ilm-cap} -waits before being able to proceed with executing the actions that expect the -index to not receive any writes anymore. For more information, refer to <>. - -[[index-look-back-time]] -`index.look_back_time`:: -(<<_static_index_settings,Static>>, <>) -Interval used to calculate the `index.time_series.start_time` for a TSDS's first -backing index when a tsdb data stream is created. Defaults to `2h` (2 hours). -Accepts `1m` (one minute) to `7d` (seven days). Only indices with an `index.mode` -of `time_series` support this setting. For more information, -refer to <>. - -[[index-routing-path]] `index.routing_path`:: -(<<_static_index_settings,Static>>, string or array of strings) Plain `keyword` -fields used to route documents in a TSDS to index shards. Supports wildcards -(`*`). Only indices with an `index.mode` of `time_series` support this setting. -Defaults to an empty list, except for data streams then defaults to the list -of <> with a `time_series_dimension` -value of `true` defined in your component and index templates. For more -information, refer to <>. - -[[index-mapping-dimension-fields-limit]] -// tag::dimensions-limit[] -`index.mapping.dimension_fields.limit`:: -(<>, integer) -Maximum number of <> for the -index. Defaults to `32768`. -// end::dimensions-limit[] diff --git a/docs/reference/data-streams/tsds-reindex.asciidoc b/docs/reference/data-streams/tsds-reindex.asciidoc deleted file mode 100644 index f4d00f33c179c..0000000000000 --- a/docs/reference/data-streams/tsds-reindex.asciidoc +++ /dev/null @@ -1,292 +0,0 @@ -[[tsds-reindex]] -=== Reindex a time series data stream (TSDS) - -++++ -Reindex a TSDS -++++ - -[discrete] -[[tsds-reindex-intro]] -==== Introduction - -With reindexing, you can copy documents from an old <> to a new one. Data streams support -reindexing in general, with a few <>. Still, time-series data streams -introduce additional challenges due to tight control on the accepted timestamp range for each backing index they -contain. Direct use of the reindex API would likely error out due to attempting to insert documents with timestamps that are -outside the current acceptance window. - -To avoid these limitations, use the process that is outlined below: - -. Create an index template for the destination data stream that will contain the re-indexed data. -. Update the template to -.. Set `index.time_series.start_time` and `index.time_series.end_time` index settings to -match the lowest and highest `@timestamp` values in the old data stream. -.. Set the `index.number_of_shards` index setting to the sum of all primary shards of all backing -indices of the old data stream. -.. Set `index.number_of_replicas` to zero and unset the `index.lifecycle.name` index setting. -. Run the reindex operation to completion. -. Revert the overriden index settings in the destination index template. -. Invoke the `rollover` api to create a new backing index that can receive new documents. - -NOTE: This process only applies to time-series data streams without <> configuration. Data -streams with downsampling can only be re-indexed by re-indexing their backing indexes individually and adding them to an -empty destination data stream. - -In what follows, we elaborate on each step of the process with examples. - -[discrete] -[[tsds-reindex-create-template]] -==== Create a TSDS template to accept old documents - -Consider a TSDS with the following template: - -[source,console] ----- -POST /_component_template/source_template -{ - "template": { - "settings": { - "index": { - "number_of_replicas": 2, - "number_of_shards": 2, - "mode": "time_series", - "routing_path": [ "metricset" ] - } - }, - "mappings": { - "properties": { - "@timestamp": { "type": "date" }, - "metricset": { - "type": "keyword", - "time_series_dimension": true - }, - "k8s": { - "properties": { - "tx": { "type": "long" }, - "rx": { "type": "long" } - } - } - } - } - } -} - -POST /_index_template/1 -{ - "index_patterns": [ - "k8s*" - ], - "composed_of": [ - "source_template" - ], - "data_stream": {} -} ----- -// TEST[skip: not expected to match the sample below] - -A possible output of `/k8s/_settings` looks like: - -[source,console-result] ----- - -{ - ".ds-k8s-2023.09.01-000002": { - "settings": { - "index": { - "mode": "time_series", - "routing": { - "allocation": { - "include": { - "_tier_preference": "data_hot" - } - } - }, - "hidden": "true", - "number_of_shards": "2", - "time_series": { - "end_time": "2023-09-01T14:00:00.000Z", - "start_time": "2023-09-01T10:00:00.000Z" - }, - "provided_name": ".ds-k9s-2023.09.01-000002", - "creation_date": "1694439857608", - "number_of_replicas": "2", - "routing_path": [ - "metricset" - ], - ... - } - } - }, - ".ds-k8s-2023.09.01-000001": { - "settings": { - "index": { - "mode": "time_series", - "routing": { - "allocation": { - "include": { - "_tier_preference": "data_hot" - } - } - }, - "hidden": "true", - "number_of_shards": "2", - "time_series": { - "end_time": "2023-09-01T10:00:00.000Z", - "start_time": "2023-09-01T06:00:00.000Z" - }, - "provided_name": ".ds-k9s-2023.09.01-000001", - "creation_date": "1694439837126", - "number_of_replicas": "2", - "routing_path": [ - "metricset" - ], - ... - } - } - } -} ----- -// NOTCONSOLE - -To reindex this TSDS, do not to re-use its index template in the destination data stream, to avoid impacting its -functionality. Instead, clone the template of the source TSDS and apply the following modifications: - -* Set `index.time_series.start_time` and `index.time_series.end_time` index settings explicitly. Their values should be -based on the lowest and highest `@timestamp` values in the data stream to reindex. This way, the initial backing index can -load all data that is contained in the source data stream. -* Set `index.number_of_shards` index setting to the sum of all primary shards of all backing indices of the source data -stream. This helps maintain the same level of search parallelism, as each shard is processed in a separate thread (or -more). -* Unset the `index.lifecycle.name` index setting, if any. This prevents ILM from modifying the destination data stream -during reindexing. -* (Optional) Set `index.number_of_replicas` to zero. This helps speed up the reindex operation. Since the data gets -copied, there is limited risk of data loss due to lack of replicas. - -Using the example above as source TSDS, the template for the destination TSDS would be: - -[source,console] ----- -POST /_component_template/destination_template -{ - "template": { - "settings": { - "index": { - "number_of_replicas": 0, - "number_of_shards": 4, - "mode": "time_series", - "routing_path": [ "metricset" ], - "time_series": { - "end_time": "2023-09-01T14:00:00.000Z", - "start_time": "2023-09-01T06:00:00.000Z" - } - } - }, - "mappings": { - "properties": { - "@timestamp": { "type": "date" }, - "metricset": { - "type": "keyword", - "time_series_dimension": true - }, - "k8s": { - "properties": { - "tx": { "type": "long" }, - "rx": { "type": "long" } - } - } - } - } - } -} - -POST /_index_template/2 -{ - "index_patterns": [ - "k9s*" - ], - "composed_of": [ - "destination_template" - ], - "data_stream": {} -} ----- -// TEST[continued] - -[discrete] -[[tsds-reindex-op]] -==== Reindex - -Invoke the reindex api, for instance: - -[source,console] ----- -POST /_reindex -{ - "source": { - "index": "k8s" - }, - "dest": { - "index": "k9s", - "op_type": "create" - } -} ----- -// TEST[continued] - -[discrete] -[[tsds-reindex-restore]] -==== Restore the destination index template - -Once the reindexing operation completes, restore the index template for the destination TSDS as follows: - -* Remove the overrides for `index.time_series.start_time` and `index.time_series.end_time`. -* Restore the values of `index.number_of_shards`, `index.number_of_replicas` and `index.lifecycle.name` as -applicable. - -Using the previous example, the destination template is modified as follows: - -[source,console] ----- -POST /_component_template/destination_template -{ - "template": { - "settings": { - "index": { - "number_of_replicas": 2, - "number_of_shards": 2, - "mode": "time_series", - "routing_path": [ "metricset" ] - } - }, - "mappings": { - "properties": { - "@timestamp": { "type": "date" }, - "metricset": { - "type": "keyword", - "time_series_dimension": true - }, - "k8s": { - "properties": { - "tx": { "type": "long" }, - "rx": { "type": "long" } - } - } - } - } - } -} ----- -// TEST[continued] - -Next, Invoke the `rollover` api on the destination data stream without any conditions set. - -[source,console] ----- -POST /k9s/_rollover/ ----- -// TEST[continued] - -This creates a new backing index with the updated index settings. The destination data stream is now ready to accept new documents. - -Note that the initial backing index can still accept documents within the range of timestamps derived from the source data -stream. If this is not desired, mark it as <> explicitly. diff --git a/docs/reference/data-streams/tsds.asciidoc b/docs/reference/data-streams/tsds.asciidoc deleted file mode 100644 index 1e1d56e5b4d93..0000000000000 --- a/docs/reference/data-streams/tsds.asciidoc +++ /dev/null @@ -1,343 +0,0 @@ -[[tsds]] -== Time series data stream (TSDS) - -A time series data stream (TSDS) models timestamped metrics data as one or -more time series. - -You can use a TSDS to store metrics data more efficiently. In our benchmarks, -metrics data stored in a TSDS used 70% less disk space than a regular data -stream. The exact impact will vary per data set. - -[discrete] -[[when-to-use-tsds]] -=== When to use a TSDS - -Both a <> and a TSDS can store timestamped -metrics data. Only use a TSDS if you typically add metrics data to {es} in near -real-time and `@timestamp` order. - -A TSDS is only intended for metrics data. For other timestamped data, such as -logs or traces, use a <> or regular data stream. - -[discrete] -[[differences-from-regular-data-stream]] -=== Differences from a regular data stream - -A TSDS works like a regular data stream with some key differences: - -* The matching index template for a TSDS requires a `data_stream` object with -the <> option. This option enables -most TSDS-related functionality. - -* In addition to a `@timestamp`, each document in a TSDS must contain one or -more <>. The matching index template for -a TSDS must contain mappings for at least one `keyword` dimension. -+ -TSDS documents also typically -contain one or more <>. - -* {es} generates a hidden <> metadata field for each document in a -TSDS. - -* A TSDS uses <> to store data -from the same time period in the same backing index. - -* The matching index template for a TSDS must contain the `index.routing_path` -index setting. A TSDS uses this setting to perform -<>. - -* A TSDS uses internal <> to order -shard segments by `_tsid` and `@timestamp`. - -* TSDS documents only support auto-generated document `_id` values. For TSDS -documents, the document `_id` is a hash of the document's dimensions and -`@timestamp`. A TSDS doesn't support custom document `_id` values. - - -* A TSDS uses <>, and as a result is -subject to some <> and <> applied to the `_source` field. - -NOTE: A time series index can contain fields other than dimensions or metrics. - -[discrete] -[[time-series]] -=== What is a time series? - -A time series is a sequence of observations for a specific entity. Together, -these observations let you track changes to the entity over time. For example, a -time series can track: - -* CPU and disk usage for a computer -* The price of a stock -* Temperature and humidity readings from a weather sensor. - -.Time series of weather sensor readings plotted as a graph -image::images/data-streams/time-series-chart.svg[align="center"] - -In a TSDS, each {es} document represents an observation, or data point, in a -specific time series. Although a TSDS can contain multiple time series, a -document can only belong to one time series. A time series can't span multiple -data streams. - -[discrete] -[[time-series-dimension]] -==== Dimensions - -Dimensions are field names and values that, in combination, identify a -document's time series. In most cases, a dimension describes some aspect of the -entity you're measuring. For example, documents related to the same weather -sensor may always have the same `sensor_id` and `location` values. - -A TSDS document is uniquely identified by its time series and timestamp, both of -which are used to generate the document `_id`. So, two documents with the same -dimensions and the same timestamp are considered to be duplicates. When you use -the `_bulk` endpoint to add documents to a TSDS, a second document with the same -timestamp and dimensions overwrites the first. When you use the -`PUT //_create/<_id>` format to add an individual document and a document -with the same `_id` already exists, an error is generated. - -You mark a field as a dimension using the boolean `time_series_dimension` -mapping parameter. The following field types support the `time_series_dimension` -parameter: - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -For a flattened field, use the `time_series_dimensions` parameter to configure an array of fields as dimensions. -For details refer to <>. - -Dimension definitions can be simplified through <> fields. - -[discrete] -[[time-series-metric]] -==== Metrics - -Metrics are fields that contain numeric measurements, as well as aggregations -and/or downsampling values based off of those measurements. While not required, -documents in a TSDS typically contain one or more metric fields. - -Metrics differ from dimensions in that while dimensions generally remain -constant, metrics are expected to change over time, even if rarely or slowly. - -To mark a field as a metric, you must specify a metric type using the -`time_series_metric` mapping parameter. The following field types support the -`time_series_metric` parameter: - -* <> -* <> -* All <> - -Accepted metric types vary based on the field type: - -.Valid values for `time_series_metric` -[%collapsible%open] -==== -// tag::time-series-metric-counter[] -`counter`:: A cumulative metric that only monotonically increases or resets to `0` (zero). For -example, a count of errors or completed tasks. -// end::time-series-metric-counter[] -+ -A counter field has additional semantic meaning, because it represents a cumulative counter. This works well with -the `rate` aggregation, since a rate can be derived from a cumulative monotonically increasing counter. However a number -of aggregations (for example `sum`) compute results that don't make sense for a counter field, because of its cumulative nature. -+ -Only numeric and `aggregate_metric_double` fields support the `counter` metric type. - -NOTE: Due to the cumulative nature of counter fields, the following aggregations are supported and expected to provide meaningful results with the `counter` field: `rate`, `histogram`, `range`, `min`, `max`, `top_metrics` and `variable_width_histogram`. In order to prevent issues with existing integrations and custom dashboards, we also allow the following aggregations, even if the result might be meaningless on counters: `avg`, `box plot`, `cardinality`, `extended stats`, `median absolute deviation`, `percentile ranks`, `percentiles`, `stats`, `sum` and `value count`. - -// tag::time-series-metric-gauge[] -`gauge`:: A metric that represents a single numeric that can arbitrarily increase or decrease. For example, a temperature or -available disk space. -// end::time-series-metric-gauge[] -+ -Only numeric and `aggregate_metric_double` fields support the `gauge` metric -type. - -// tag::time-series-metric-null[] -`null` (Default):: Not a time series metric. -// end::time-series-metric-null[] -==== - -[discrete] -[[time-series-mode]] -=== Time series mode - -The matching index template for a TSDS must contain a `data_stream` object with -the `index_mode: time_series` option. This option ensures the TSDS creates -backing indices with an <> setting of `time_series`. -This setting enables most TSDS-related functionality in the backing indices. - -If you convert an existing data stream to a TSDS, only backing indices created -after the conversion have an `index.mode` of `time_series`. You can't -change the `index.mode` of an existing backing index. - -[discrete] -[[tsid]] -==== `_tsid` metadata field - -When you add a document to a TSDS, {es} automatically generates a `_tsid` -metadata field for the document. The `_tsid` is an object containing the -document's dimensions. Documents in the same TSDS with the same `_tsid` are part -of the same time series. - -The `_tsid` field is not queryable or updatable. You also can't retrieve a -document's `_tsid` using a <> request. However, you can -use the `_tsid` field in aggregations and retrieve the `_tsid` value in searches -using the <>. - -WARNING: The format of the `_tsid` field shouldn't be relied upon. It may change -from version to version. - -[discrete] -[[time-bound-indices]] -==== Time-bound indices - -In a TSDS, each backing index, including the most recent backing index, has a -range of accepted `@timestamp` values. This range is defined by the -<> and -<> index settings. - -When you add a document to a TSDS, {es} adds the document to the appropriate -backing index based on its `@timestamp` value. As a result, a TSDS can add -documents to any TSDS backing index that can receive writes. This applies even -if the index isn't the most recent backing index. - -image::images/data-streams/time-bound-indices.svg[align="center"] - -TIP: Some {ilm-init} actions mark the source index as read-only, or expect the index -to not be actively written anymore in order to provide good performance. These actions are: -- <> -- <> -- <> -- <> -- <> -- <> -{ilm-cap} will **not** proceed with executing these actions until the upper time-bound -for accepting writes, represented by the <> -index setting, has lapsed. - -If no backing index can accept a document's `@timestamp` value, {es} rejects the -document. - - -{es} automatically configures `index.time_series.start_time` and -`index.time_series.end_time` settings as part of the index creation and rollover -process. - -[discrete] -[[tsds-look-ahead-time]] -==== Look-ahead time - -Use the <> index setting to -configure how far into the future you can add documents to an index. When you -create a new write index for a TSDS, {es} calculates the index's -`index.time_series.end_time` value as: - -`now + index.look_ahead_time` - -At the time series poll interval (controlled via `time_series.poll_interval` setting), -{es} checks if the write index has met the rollover criteria in its index -lifecycle policy. If not, {es} refreshes the `now` value and updates the write -index's `index.time_series.end_time` to: - -`now + index.look_ahead_time + time_series.poll_interval` - -This process continues until the write index rolls over. When the index rolls -over, {es} sets a final `index.time_series.end_time` value for the index. This -value borders the `index.time_series.start_time` for the new write index. This -ensures the `@timestamp` ranges for neighboring backing indices always border -but never overlap. - -[discrete] -[[tsds-look-back-time]] -==== Look-back time - -Use the <> index setting to -configure how far in the past you can add documents to an index. When you -create a data stream for a TSDS, {es} calculates the index's -`index.time_series.start_time` value as: - -`now - index.look_back_time` - -This setting is only used when a data stream gets created and controls -the `index.time_series.start_time` index setting of the first backing index. -Configuring this index setting can be useful to accept documents with `@timestamp` -field values that are older than 2 hours (the `index.look_back_time` default). - -[discrete] -[[tsds-accepted-time-range]] -==== Accepted time range for adding data - -A TSDS is designed to ingest current metrics data. When the TSDS is first -created the initial backing index has: - -* an `index.time_series.start_time` value set to `now - index.look_back_time` -* an `index.time_series.end_time` value set to `now + index.look_ahead_time` - -Only data that falls inside that range can be indexed. - -You can use the <> to check the -accepted time range for writing to any TSDS. - -[discrete] -[[dimension-based-routing]] -==== Dimension-based routing - -Within each TSDS backing index, {es} uses the -<> index setting to route documents -with the same dimensions to the same shards. - -When you create the matching index template for a TSDS, you must specify one or -more dimensions in the `index.routing_path` setting. Each document in a TSDS -must contain one or more dimensions that match the `index.routing_path` setting. - -The `index.routing_path` setting accepts wildcard patterns (for example `dim.*`) -and can dynamically match new fields. However, {es} will reject any mapping -updates that add scripted, runtime, or non-dimension fields that -match the `index.routing_path` value. - -<> fields may be configured -as dimension containers. In this case, their sub-fields get included to the -routing path automatically. - -TSDS documents don't support a custom `_routing` value. Similarly, you can't -require a `_routing` value in mappings for a TSDS. - -[discrete] -[[tsds-index-sorting]] -==== Index sorting - -{es} uses <> to compress repeated values. -This compression works best when repeated values are stored near each other — in -the same index, on the same shard, and side-by-side in the same shard segment. - -Most time series data contains repeated values. Dimensions are repeated across -documents in the same time series. The metric values of a time series may also -change slowly over time. - -Internally, each TSDS backing index uses <> to order its shard segments by `_tsid` and `@timestamp`. This makes it -more likely that these repeated values are stored near each other for better -compression. A TSDS doesn't support any -<> index settings. - -[discrete] -[[tsds-whats-next]] -=== What's next? - -Now that you know the basics, you're ready to <> or -<>. - -include::set-up-tsds.asciidoc[] -include::tsds-index-settings.asciidoc[] -include::downsampling.asciidoc[] -include::downsampling-ilm.asciidoc[] -include::downsampling-manual.asciidoc[] -include::downsampling-dsl.asciidoc[] -include::tsds-reindex.asciidoc[] diff --git a/docs/reference/data-streams/use-a-data-stream.asciidoc b/docs/reference/data-streams/use-a-data-stream.asciidoc deleted file mode 100644 index 3167d768983a2..0000000000000 --- a/docs/reference/data-streams/use-a-data-stream.asciidoc +++ /dev/null @@ -1,375 +0,0 @@ -[role="xpack"] -[[use-a-data-stream]] -== Use a data stream - -After you <>, you can do -the following: - -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> -* <> - -//// -[source,console] ----- -PUT /_index_template/my-data-stream-template -{ - "index_patterns": [ "my-data-stream*" ], - "data_stream": { } -} - -PUT /_data_stream/my-data-stream - -POST /my-data-stream/_rollover/ - -POST /my-data-stream/_rollover/ - -PUT /my-data-stream/_create/bfspvnIBr7VVZlfp2lqX?refresh=wait_for -{ - "@timestamp": "2099-03-08T11:06:07.000Z", - "user": { - "id": "yWIumJd7" - }, - "message": "Login successful" -} ----- -// TESTSETUP - -[source,console] ----- -DELETE /_data_stream/my-data-stream* - -DELETE /_index_template/my-data-stream-template ----- -// TEARDOWN -//// - -[discrete] -[[add-documents-to-a-data-stream]] -=== Add documents to a data stream - -To add an individual document, use the <>. -<> are supported. - -[source,console] ----- -POST /my-data-stream/_doc/ -{ - "@timestamp": "2099-03-08T11:06:07.000Z", - "user": { - "id": "8a4f500d" - }, - "message": "Login successful" -} ----- - -You cannot add new documents to a data stream using the index API's `PUT -//_doc/<_id>` request format. To specify a document ID, use the `PUT -//_create/<_id>` format instead. Only an -<> of `create` is supported. - -To add multiple documents with a single request, use the <>. -Only `create` actions are supported. - -[source,console] ----- -PUT /my-data-stream/_bulk?refresh -{"create":{ }} -{ "@timestamp": "2099-03-08T11:04:05.000Z", "user": { "id": "vlb44hny" }, "message": "Login attempt failed" } -{"create":{ }} -{ "@timestamp": "2099-03-08T11:06:07.000Z", "user": { "id": "8a4f500d" }, "message": "Login successful" } -{"create":{ }} -{ "@timestamp": "2099-03-09T11:07:08.000Z", "user": { "id": "l7gk7f82" }, "message": "Logout successful" } ----- - -[discrete] -[[search-a-data-stream]] -=== Search a data stream - -The following search APIs support data streams: - -* <> -* <> -* <> -* <> -* <> - -[discrete] -[[get-stats-for-a-data-stream]] -=== Get statistics for a data stream - -Use the <> to get -statistics for one or more data streams: - -[source,console] ----- -GET /_data_stream/my-data-stream/_stats?human=true ----- - -[discrete] -[[manually-roll-over-a-data-stream]] -=== Manually roll over a data stream - -Use the <> to manually -<> a data stream. You have -two options when manually rolling over: - -1. To immediately trigger a rollover: -+ -[source,console] ----- -POST /my-data-stream/_rollover/ ----- -2. Or to postpone the rollover until the next indexing event occurs: -+ -[source,console] ----- -POST /my-data-stream/_rollover?lazy ----- -+ -Use the second to avoid having empty backing indices in data streams -that do not get updated often. - -[discrete] -[[open-closed-backing-indices]] -=== Open closed backing indices - -You cannot search a <> backing index, even by searching -its data stream. You also cannot <> -or <> documents in a closed index. - -To re-open a closed backing index, submit an <> directly to the index: - -[source,console] ----- -POST /.ds-my-data-stream-2099.03.07-000001/_open/ ----- -// TEST[setup:my_index] -// TEST[s/.ds-my-data-stream-2099.03.07-000001/my-index-000001/] - -To re-open all closed backing indices for a data stream, submit an open index -API request to the stream: - -[source,console] ----- -POST /my-data-stream/_open/ ----- - -[discrete] -[[reindex-with-a-data-stream]] -=== Reindex with a data stream - -Use the <> to copy documents from an existing index, -alias, or data stream to a data stream. Because data streams are -<>, a reindex into a data stream must use -an `op_type` of `create`. A reindex cannot update existing documents in a data -stream. - -//// -[source,console] ----- -PUT /_bulk?refresh=wait_for -{"create":{"_index" : "archive_1"}} -{ "@timestamp": "2099-03-08T11:04:05.000Z" } -{"create":{"_index" : "archive_2"}} -{ "@timestamp": "2099-03-08T11:06:07.000Z" } -{"create":{"_index" : "archive_2"}} -{ "@timestamp": "2099-03-09T11:07:08.000Z" } -{"create":{"_index" : "archive_2"}} -{ "@timestamp": "2099-03-09T11:07:08.000Z" } - -POST /_aliases -{ - "actions" : [ - { "add" : { "index" : "archive_1", "alias" : "archive" } }, - { "add" : { "index" : "archive_2", "alias" : "archive", "is_write_index" : true} } - ] -} ----- -//// - -[source,console] ----- -POST /_reindex -{ - "source": { - "index": "archive" - }, - "dest": { - "index": "my-data-stream", - "op_type": "create" - } -} ----- -// TEST[continued] - -[discrete] -[[update-docs-in-a-data-stream-by-query]] -=== Update documents in a data stream by query - -Use the <> to update documents in a -data stream that match a provided query: - -[source,console] ----- -POST /my-data-stream/_update_by_query -{ - "query": { - "match": { - "user.id": "l7gk7f82" - } - }, - "script": { - "source": "ctx._source.user.id = params.new_id", - "params": { - "new_id": "XgdX0NoX" - } - } -} ----- - -[discrete] -[[delete-docs-in-a-data-stream-by-query]] -=== Delete documents in a data stream by query - -Use the <> to delete documents in a -data stream that match a provided query: - -[source,console] ----- -POST /my-data-stream/_delete_by_query -{ - "query": { - "match": { - "user.id": "vlb44hny" - } - } -} ----- - -[discrete] -[[update-delete-docs-in-a-backing-index]] -=== Update or delete documents in a backing index - -If needed, you can update or delete documents in a data stream by sending -requests to the backing index containing the document. You'll need: - -* The <> -* The name of the backing index containing the document -* If updating the document, its <> - -To get this information, use a <>: - -[source,console] ----- -GET /my-data-stream/_search -{ - "seq_no_primary_term": true, - "query": { - "match": { - "user.id": "yWIumJd7" - } - } -} ----- - -Response: - -[source,console-result] ----- -{ - "took": 20, - "timed_out": false, - "_shards": { - "total": 3, - "successful": 3, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 1, - "relation": "eq" - }, - "max_score": 0.2876821, - "hits": [ - { - "_index": ".ds-my-data-stream-2099.03.08-000003", <1> - "_id": "bfspvnIBr7VVZlfp2lqX", <2> - "_seq_no": 0, <3> - "_primary_term": 1, <4> - "_score": 0.2876821, - "_source": { - "@timestamp": "2099-03-08T11:06:07.000Z", - "user": { - "id": "yWIumJd7" - }, - "message": "Login successful" - } - } - ] - } -} ----- -// TESTRESPONSE[s/"took": 20/"took": $body.took/] -// TESTRESPONSE[s/"max_score": 0.2876821/"max_score": $body.hits.max_score/] -// TESTRESPONSE[s/"_index": ".ds-my-data-stream-2099.03.08-000003"/"_index": $body.hits.hits.0._index/] -// TESTRESPONSE[s/"_score": 0.2876821/"_score": $body.hits.hits.0._score/] - -<1> Backing index containing the matching document -<2> Document ID for the document -<3> Current sequence number for the document -<4> Primary term for the document - -To update the document, use an <> request with valid -`if_seq_no` and `if_primary_term` arguments: - -[source,console] ----- -PUT /.ds-my-data-stream-2099-03-08-000003/_doc/bfspvnIBr7VVZlfp2lqX?if_seq_no=0&if_primary_term=1 -{ - "@timestamp": "2099-03-08T11:06:07.000Z", - "user": { - "id": "8a4f500d" - }, - "message": "Login successful" -} ----- -// TEST[setup:my_index] -// TEST[s/.ds-my-data-stream-2099.03.08-000003/my-index-000001/] -// TEST[s/bfspvnIBr7VVZlfp2lqX/1/] -// TEST[s/if_seq_no=0/if_seq_no=1/] - -To delete the document, use the <>: - -[source,console] ----- -DELETE /.ds-my-data-stream-2099.03.08-000003/_doc/bfspvnIBr7VVZlfp2lqX ----- -// TEST[setup:my_index] -// TEST[s/.ds-my-data-stream-2099.03.08-000003/my-index-000001/] -// TEST[s/bfspvnIBr7VVZlfp2lqX/1/] - -To delete or update multiple documents with a single request, use the -<>'s `delete`, `index`, and `update` actions. For `index` -actions, include valid <> arguments. - -[source,console] ----- -PUT /_bulk?refresh -{ "index": { "_index": ".ds-my-data-stream-2099.03.08-000003", "_id": "bfspvnIBr7VVZlfp2lqX", "if_seq_no": 0, "if_primary_term": 1 } } -{ "@timestamp": "2099-03-08T11:06:07.000Z", "user": { "id": "8a4f500d" }, "message": "Login successful" } ----- -// TEST[setup:my_index] -// TEST[s/.ds-my-data-stream-2099.03.08-000003/my-index-000001/] -// TEST[s/bfspvnIBr7VVZlfp2lqX/1/] - diff --git a/docs/reference/datatiers.asciidoc b/docs/reference/datatiers.asciidoc deleted file mode 100644 index 066765368ec5e..0000000000000 --- a/docs/reference/datatiers.asciidoc +++ /dev/null @@ -1,263 +0,0 @@ -[role="xpack"] -[[data-tiers]] -== Data tiers - -A _data tier_ is a collection of <> within a cluster that share the same -<>, and a hardware profile that's appropriately sized for the role. Elastic recommends that nodes in the same tier share the same -hardware profile to avoid <>. - -The data tiers that you use, and the way that you use them, depends on the data's <>. - -The following data tiers are can be used with each data category: - -Content data: - -* <> nodes handle the indexing and query load for non-timeseries -indices, such as a product catalog. - -Time series data: - -* <> nodes handle the indexing load for time series data, -such as logs or metrics. They hold your most recent, most-frequently-accessed data. -* <> nodes hold time series data that is accessed less-frequently -and rarely needs to be updated. -* <> nodes hold time series data that is accessed -infrequently and not normally updated. To save space, you can keep -<> of -<> on the cold tier. These fully mounted -indices eliminate the need for replicas, reducing required disk space by -approximately 50% compared to the regular indices. -* <> nodes hold time series data that is accessed -rarely and never updated. The frozen tier stores <> of <> exclusively. -This extends the storage capacity even further — by up to 20 times compared to -the warm tier. - -TIP: The performance of an {es} node is often limited by the performance of the underlying storage and hardware profile. -For example hardware profiles, refer to Elastic Cloud's {cloud}/ec-reference-hardware.html[instance configurations]. -Review our recommendations for optimizing your storage for <> and <>. - -IMPORTANT: {es} assumes nodes within a data tier share the same hardware profile (such as CPU, RAM, disk capacity). -Data tiers with unequally resourced nodes have a higher risk of <>. - -The way data tiers are used often depends on the data's category: - -- Content data remains on the <> for its entire -data lifecycle. - -- Time series data may progress through the -descending temperature data tiers (hot, warm, cold, and frozen) according to your -performance, resiliency, and data retention requirements. -+ -You can automate these lifecycle transitions using the <>, or custom <>. - -[discrete] -[[available-tier]] -=== Available data tiers - -Learn more about each data tier, including when and how it should be used. - -[discrete] -[[content-tier]] -==== Content tier - -// tag::content-tier[] -Data stored in the content tier is generally a collection of items such as a product catalog or article archive. -Unlike time series data, the value of the content remains relatively constant over time, -so it doesn't make sense to move it to a tier with different performance characteristics as it ages. -Content data typically has long data retention requirements, and you want to be able to retrieve -items quickly regardless of how old they are. - -Content tier nodes are usually optimized for query performance--they prioritize processing power over IO throughput -so they can process complex searches and aggregations and return results quickly. -While they are also responsible for indexing, content data is generally not ingested at as high a rate -as time series data such as logs and metrics. From a resiliency perspective the indices in this -tier should be configured to use one or more replicas. - -The content tier is required and is often deployed within the same node -grouping as the hot tier. System indices and other indices that aren't part -of a data stream are automatically allocated to the content tier. -// end::content-tier[] - -[discrete] -[[hot-tier]] -==== Hot tier - -// tag::hot-tier[] -The hot tier is the {es} entry point for time series data and holds your most-recent, -most-frequently-searched time series data. -Nodes in the hot tier need to be fast for both reads and writes, -which requires more hardware resources and faster storage (SSDs). -For resiliency, indices in the hot tier should be configured to use one or more replicas. - -The hot tier is required. New indices that are part of a <> are automatically allocated to the hot tier. -// end::hot-tier[] - -[discrete] -[[warm-tier]] -==== Warm tier - -// tag::warm-tier[] -Time series data can move to the warm tier once it is being queried less frequently -than the recently-indexed data in the hot tier. -The warm tier typically holds data from recent weeks. -Updates are still allowed, but likely infrequent. -Nodes in the warm tier generally don't need to be as fast as those in the hot tier. -For resiliency, indices in the warm tier should be configured to use one or more replicas. -// end::warm-tier[] - -[discrete] -[[cold-tier]] -==== Cold tier - -// tag::cold-tier[] -When you no longer need to search time series data regularly, it can move from -the warm tier to the cold tier. While still searchable, this tier is typically -optimized for lower storage costs rather than search speed. - -For better storage savings, you can keep <> -of <> on the cold tier. Unlike regular -indices, these fully mounted indices don't require replicas for reliability. In -the event of a failure, they can recover data from the underlying snapshot -instead. This potentially halves the local storage needed for the data. A -snapshot repository is required to use fully mounted indices in the cold tier. -Fully mounted indices are read-only. - -Alternatively, you can use the cold tier to store regular indices with replicas instead -of using {search-snaps}. This lets you store older data on less expensive hardware -but doesn't reduce required disk space compared to the warm tier. -// end::cold-tier[] - -[discrete] -[[frozen-tier]] -==== Frozen tier - -// tag::frozen-tier[] -Once data is no longer being queried, or being queried rarely, it may move from -the cold tier to the frozen tier where it stays for the rest of its life. - -The frozen tier requires a snapshot repository. -The frozen tier uses <> to store -and load data from a snapshot repository. This reduces local storage and -operating costs while still letting you search frozen data. Because {es} must -sometimes fetch frozen data from the snapshot repository, searches on the frozen -tier are typically slower than on the cold tier. -// end::frozen-tier[] - -[discrete] -[[configure-data-tiers]] -=== Configure data tiers - -Follow the instructions for your deployment type to configure data tiers. - -[discrete] -[[configure-data-tiers-cloud]] -==== {ess} or {ece} - -The default configuration for an {ecloud} deployment includes a shared tier for -hot and content data. This tier is required and can't be removed. - -To add a warm, cold, or frozen tier when you create a deployment: - -. On the **Create deployment** page, click **Advanced Settings**. - -. Click **+ Add capacity** for any data tiers to add. - -. Click **Create deployment** at the bottom of the page to save your changes. - -[role="screenshot"] -image::images/data-tiers/ess-advanced-config-data-tiers.png[{ecloud}'s deployment Advanced configuration page,align=center] - -To add a data tier to an existing deployment: - -. Log in to the {ess-console}[{ecloud} console]. - -. On the **Deployments** page, select your deployment. - -. In your deployment menu, select **Edit**. - -. Click **+ Add capacity** for any data tiers to add. - -. Click **Save** at the bottom of the page to save your changes. - - -To remove a data tier, refer to {cloud}/ec-disable-data-tier.html[Disable a data -tier]. - -[discrete] -[[configure-data-tiers-on-premise]] -==== Self-managed deployments - -For self-managed deployments, each node's <> is configured -in `elasticsearch.yml`. For example, the highest-performance nodes in a cluster -might be assigned to both the hot and content tiers: - -[source,yaml] ----- -node.roles: ["data_hot", "data_content"] ----- - -NOTE: We recommend you use <> in the frozen -tier. - -[discrete] -[[data-tier-allocation]] -=== Data tier index allocation - -The <> setting determines which tier the index should be allocated to. - -When you create an index, by default {es} sets the `_tier_preference` -to `data_content` to automatically allocate the index shards to the content tier. - -When {es} creates an index as part of a <>, -by default {es} sets the `_tier_preference` -to `data_hot` to automatically allocate the index shards to the hot tier. - -At the time of index creation, you can override the default setting by explicitly setting -the preferred value in one of two ways: - -- Using an <>. Refer to <> for details. -- Within the <> request body. - -You can override this -setting after index creation by <> to the preferred -value. - -This setting also accepts multiple tiers in order of preference. This prevents indices from remaining unallocated if no nodes are available in the preferred tier. For example, when {ilm} migrates an index to the cold phase, it sets the index `_tier_preference` to `data_cold,data_warm,data_hot`. - -To remove the data tier preference -setting, set the `_tier_preference` value to `null`. This allows the index to allocate to any data node within the cluster. Setting the `_tier_preference` to `null` does not restore the default value. Note that, in the case of managed indices, a <> action might apply a new value in its place. - -[discrete] -[[data-tier-allocation-value]] -==== Determine the current data tier preference - -You can check an existing index's data tier preference by <> for `index.routing.allocation.include._tier_preference`: - -[source,console] --------------------------------------------------- -GET /my-index-000001/_settings?filter_path=*.settings.index.routing.allocation.include._tier_preference --------------------------------------------------- -// TEST[setup:my_index] - -[discrete] -[[data-tier-allocation-troubleshooting]] -==== Troubleshooting - -The `_tier_preference` setting might conflict with other allocation settings. This conflict might prevent the shard from allocating. A conflict might occur when a cluster has not yet been completely <>. - -This setting will not unallocate a currently allocated shard, but might prevent it from migrating from its current location to its designated data tier. To troubleshoot, call the <> and specify the suspected problematic shard. - -[discrete] -[[data-tier-migration]] -==== Automatic data tier migration - -{ilm-init} automatically transitions managed -indices through the available data tiers using the <> action. -By default, this action is automatically injected in every phase. -You can explicitly specify the migrate action with `"enabled": false` to <>, -for example, if you're using the <> to manually -specify allocation rules. diff --git a/docs/reference/dependencies-versions.asciidoc b/docs/reference/dependencies-versions.asciidoc deleted file mode 100644 index 8c0d3e3c572bf..0000000000000 --- a/docs/reference/dependencies-versions.asciidoc +++ /dev/null @@ -1,10 +0,0 @@ -["appendix",id="dependencies-versions"] -= Dependencies and versions - -ifeval::["{release-state}"=="unreleased"] -See https://artifacts.elastic.co/reports/dependencies/dependencies-current.html[Elastic Stack Third-party Dependencices] for the complete list of dependencies for {es}. -endif::[] - -ifeval::["{release-state}"=="released"] -See https://artifacts.elastic.co/reports/dependencies/dependencies-{elasticsearch_version}.html[Elastic Stack Third-party Dependencices] for the complete list of dependencies for {es} {elasticsearch_version}. -endif::[] diff --git a/docs/reference/docs.asciidoc b/docs/reference/docs.asciidoc deleted file mode 100644 index ccdbaaffb2b77..0000000000000 --- a/docs/reference/docs.asciidoc +++ /dev/null @@ -1,49 +0,0 @@ -[[docs]] -== Document APIs - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-document[Document APIs]. --- - -This section describes the following CRUD APIs: - -.Single document APIs -* <> -* <> -* <> -* <> - -.Multi-document APIs -* <> -* <> -* <> -* <> -* <> - -include::docs/index_.asciidoc[] - -include::docs/get.asciidoc[] - -include::docs/delete.asciidoc[] - -include::docs/delete-by-query.asciidoc[] - -include::docs/update.asciidoc[] - -include::docs/update-by-query.asciidoc[] - -include::docs/multi-get.asciidoc[] - -include::docs/bulk.asciidoc[] - -include::docs/reindex.asciidoc[] - -include::docs/termvectors.asciidoc[] - -include::docs/multi-termvectors.asciidoc[] - -include::docs/refresh.asciidoc[] - -include::docs/concurrency-control.asciidoc[] diff --git a/docs/reference/docs/bulk.asciidoc b/docs/reference/docs/bulk.asciidoc deleted file mode 100644 index 78169e841dab4..0000000000000 --- a/docs/reference/docs/bulk.asciidoc +++ /dev/null @@ -1,799 +0,0 @@ -[[docs-bulk]] -=== Bulk API -++++ -Bulk -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-document[Document APIs]. --- - -Performs multiple indexing or delete operations in a single API call. -This reduces overhead and can greatly increase indexing speed. - -[source,console] --------------------------------------------------- -POST _bulk -{ "index" : { "_index" : "test", "_id" : "1" } } -{ "field1" : "value1" } -{ "delete" : { "_index" : "test", "_id" : "2" } } -{ "create" : { "_index" : "test", "_id" : "3" } } -{ "field1" : "value3" } -{ "update" : {"_id" : "1", "_index" : "test"} } -{ "doc" : {"field2" : "value2"} } --------------------------------------------------- - -[[docs-bulk-api-request]] -==== {api-request-title} - -`POST /_bulk` - -`POST //_bulk` - -[[docs-bulk-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the following -<> for the target data stream, index, -or index alias: - -** To use the `create` action, you must have the `create_doc`, `create`, -`index`, or `write` index privilege. Data streams support only the `create` -action. - -** To use the `index` action, you must have the `create`, `index`, or `write` -index privilege. - -** To use the `delete` action, you must have the `delete` or `write` index -privilege. - -** To use the `update` action, you must have the `index` or `write` index -privilege. - -** To automatically create a data stream or index with a bulk API request, you -must have the `auto_configure`, `create_index`, or `manage` index privilege. - -** To make the result of a bulk operation visible to search using the `refresh` -parameter, you must have the `maintenance` or `manage` index privilege. - -* Automatic data stream creation requires a matching index template with data -stream enabled. See <>. - -[[docs-bulk-api-desc]] -==== {api-description-title} - -Provides a way to perform multiple `index`, `create`, `delete`, and `update` actions in a single request. - -The actions are specified in the request body using a newline delimited JSON (NDJSON) structure: - -[source,js] --------------------------------------------------- -action_and_meta_data\n -optional_source\n -action_and_meta_data\n -optional_source\n -.... -action_and_meta_data\n -optional_source\n --------------------------------------------------- -// NOTCONSOLE - -The `index` and `create` actions expect a source on the next line, -and have the same semantics as the `op_type` parameter in the standard index API: -`create` fails if a document with the same ID already exists in the target, -`index` adds or replaces a document as necessary. - -NOTE: <> support only the `create` action. To update -or delete a document in a data stream, you must target the backing index -containing the document. See <>. - -`update` expects that the partial doc, upsert, -and script and its options are specified on the next line. - -`delete` does not expect a source on the next line and -has the same semantics as the standard delete API. - -[NOTE] -==== -The final line of data must end with a newline character `\n`. -Each newline character may be preceded by a carriage return `\r`. -When sending NDJSON data to the `_bulk` endpoint, use a `Content-Type` header of -`application/json` or `application/x-ndjson`. -==== - -Because this format uses literal `\n`'s as delimiters, -make sure that the JSON actions and sources are not pretty printed. - -If you provide a `` in the request path, -it is used for any actions that don't explicitly specify an `_index` argument. - -A note on the format: The idea here is to make processing of this as -fast as possible. As some of the actions are redirected to other -shards on other nodes, only `action_meta_data` is parsed on the -receiving node side. - -Client libraries using this protocol should try and strive to do -something similar on the client side, and reduce buffering as much as -possible. - -There is no "correct" number of actions to perform in a single bulk request. -Experiment with different settings to find the optimal size for your particular -workload. Note that {es} limits the maximum size of a HTTP request to `100mb` -by default so clients must ensure that no request exceeds this size. It is not -possible to index a single document which exceeds the size limit, so you must -pre-process any such documents into smaller pieces before sending them to {es}. -For instance, split documents into pages or chapters before indexing them, or -store raw binary data in a system outside {es} and replacing the raw data with -a link to the external system in the documents that you send to {es}. - -[discrete] -[[bulk-clients]] -===== Client support for bulk requests - -Some of the officially supported clients provide helpers to assist with -bulk requests and reindexing: - -Go:: - - See https://github.com/elastic/go-elasticsearch/tree/master/_examples/bulk#indexergo[esutil.BulkIndexer] - -Perl:: - - See https://metacpan.org/pod/Search::Elasticsearch::Client::5_0::Bulk[Search::Elasticsearch::Client::5_0::Bulk] - and https://metacpan.org/pod/Search::Elasticsearch::Client::5_0::Scroll[Search::Elasticsearch::Client::5_0::Scroll] - -Python:: - - See https://elasticsearch-py.readthedocs.io/en/latest/helpers.html[elasticsearch.helpers.*] - -JavaScript:: - - See {jsclient-current}/client-helpers.html[client.helpers.*] - -.NET:: - See https://www.elastic.co/guide/en/elasticsearch/client/net-api/current/indexing-documents.html[`BulkAllObservable`] - -PHP:: - See https://www.elastic.co/guide/en/elasticsearch/client/php-api/current/indexing_documents.html#_bulk_indexing[Bulk indexing] - -[discrete] -[[bulk-curl]] -===== Submitting bulk requests with cURL - -If you're providing text file input to `curl`, you *must* use the -`--data-binary` flag instead of plain `-d`. The latter doesn't preserve -newlines. Example: - -[source,js] --------------------------------------------------- -$ cat requests -{ "index" : { "_index" : "test", "_id" : "1" } } -{ "field1" : "value1" } -$ curl -s -H "Content-Type: application/x-ndjson" -XPOST localhost:9200/_bulk --data-binary "@requests"; echo -{"took":7, "errors": false, "items":[{"index":{"_index":"test","_id":"1","_version":1,"result":"created","forced_refresh":false}}]} --------------------------------------------------- -// NOTCONSOLE -// Not converting to console because this shows how curl works - -[discrete] -[[bulk-optimistic-concurrency-control]] -===== Optimistic concurrency control - -Each `index` and `delete` action within a bulk API call may include the -`if_seq_no` and `if_primary_term` parameters in their respective action -and meta data lines. The `if_seq_no` and `if_primary_term` parameters control -how operations are executed, based on the last modification to existing -documents. See <> for more details. - - -[discrete] -[[bulk-versioning]] -===== Versioning - -Each bulk item can include the version value using the -`version` field. It automatically follows the behavior of the -index / delete operation based on the `_version` mapping. It also -support the `version_type` (see <>). - -[discrete] -[[bulk-routing]] -===== Routing - -Each bulk item can include the routing value using the -`routing` field. It automatically follows the behavior of the -index / delete operation based on the `_routing` mapping. - -NOTE: Data streams do not support custom routing unless they were created with -the <> setting -enabled in the template. - -[discrete] -[[bulk-wait-for-active-shards]] -===== Wait for active shards - -When making bulk calls, you can set the `wait_for_active_shards` -parameter to require a minimum number of shard copies to be active -before starting to process the bulk request. See -<> for further details and a usage -example. - -[discrete] -[[bulk-refresh]] -===== Refresh - -Control when the changes made by this request are visible to search. See -<>. - -NOTE: Only the shards that receive the bulk request will be affected by -`refresh`. Imagine a `_bulk?refresh=wait_for` request with three -documents in it that happen to be routed to different shards in an index -with five shards. The request will only wait for those three shards to -refresh. The other two shards that make up the index do not -participate in the `_bulk` request at all. - -[discrete] -[[bulk-security]] -===== Security - -See <>. - -[[docs-bulk-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) -Name of the data stream, index, or index alias to perform bulk actions -on. - -[[docs-bulk-api-query-params]] -==== {api-query-parms-title} - -`list_executed_pipelines`:: -(Optional, Boolean) If `true`, the response will include the ingest pipelines that -were executed for each `index` or `create`. -Defaults to `false`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=pipeline] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=refresh] - -`require_alias`:: -(Optional, Boolean) If `true`, the request's actions must target an index alias. -Defaults to `false`. - -`require_data_stream`:: -(Optional, Boolean) If `true`, the request's actions must target a data stream (existing or to-be-created). -Defaults to `false`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=routing] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=source] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=source_excludes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=source_includes] - -`timeout`:: -+ --- -(Optional, <>) -Period each action waits for the following operations: - -* <> -* <> updates -* <> - -Defaults to `1m` (one minute). This guarantees {es} waits for at least the -timeout before failing. The actual wait time could be longer, particularly when -multiple waits occur. --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=wait_for_active_shards] - -[[bulk-api-request-body]] -==== {api-request-body-title} -The request body contains a newline-delimited list of `create`, `delete`, `index`, -and `update` actions and their associated source data. - -`create`:: -(Optional, string) -Indexes the specified document if it does not already exist. -The following line must contain the source data to be indexed. -+ --- -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-index-ds] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-id] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-list-executed-pipelines] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-require-alias] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-dynamic-templates] --- - -`delete`:: -(Optional, string) -Removes the specified document from the index. -+ --- -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-index] - -`_id`:: -(Required, string) The document ID. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-require-alias] --- - -`index`:: -(Optional, string) -Indexes the specified document. -If the document exists, replaces the document and increments the version. -The following line must contain the source data to be indexed. -+ --- -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-index] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-id] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-list-executed-pipelines] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-require-alias] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-dynamic-templates] --- - -`update`:: -(Optional, string) -Performs a partial document update. -The following line must contain the partial document and update options. -+ --- -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-index] - -`_id`:: -(Required, string) The document ID. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bulk-require-alias] --- - -`doc`:: -(Optional, object) -The partial document to index. -Required for `update` operations. - -``:: -(Optional, object) -The document source to index. -Required for `create` and `index` operations. - -[role="child_attributes"] -[[bulk-api-response-body]] -==== {api-response-body-title} - -The bulk API's response contains the individual results of each operation in the -request, returned in the order submitted. The success or failure of an -individual operation does not affect other operations in the request. - -`took`:: -(integer) -How long, in milliseconds, it took to process the bulk request. - -`errors`:: -(Boolean) -If `true`, one or more of the operations in the bulk request did not complete -successfully. - -`items`:: -(array of objects) -Contains the result of each operation in the bulk request, in the order they -were submitted. -+ -.Properties of `items` objects -[%collapsible%open] -==== -:: -(object) -The parameter name is an action associated with the operation. Possible values -are `create`, `delete`, `index`, and `update`. -+ -The parameter value is an object that contains information for the associated -operation. -+ -.Properties of `` -[%collapsible%open] -===== -`_index`:: -(string) -Name of the index associated with the operation. If the operation targeted a -data stream, this is the backing index into which the document was written. - -`_id`:: -(integer) -The document ID associated with the operation. - -`_version`:: -(integer) -The document version associated with the operation. The document version is -incremented each time the document is updated. -+ -This parameter is only returned for successful actions. - -`result`:: -(string) -Result of the operation. Successful values are `created`, `deleted`, and -`updated`. Other valid values are `noop` and `not_found`. - -`_shards`:: -(object) -Contains shard information for the operation. -+ -This parameter is only returned for successful operations. -+ -.Properties of `_shards` -[%collapsible%open] -====== -`total`:: -(integer) -Number of shards the operation attempted to execute on. - -`successful`:: -(integer) -Number of shards the operation succeeded on. - -`failed`:: -(integer) -Number of shards the operation attempted to execute on but failed. -====== - -`_seq_no`:: -(integer) -The sequence number assigned to the document for the operation. -Sequence numbers are used to ensure an older version of a document -doesn’t overwrite a newer version. See <>. -+ -This parameter is only returned for successful operations. - -`_primary_term`:: -(integer) -The primary term assigned to the document for the operation. -See <>. -+ -This parameter is only returned for successful operations. - -`status`:: -(integer) -HTTP status code returned for the operation. - -`error`:: -(object) -Contains additional information about the failed operation. -+ -The parameter is only returned for failed operations. -+ -.Properties of `error` -[%collapsible%open] -====== -`type`:: -(string) -Error type for the operation. - -`reason`:: -(string) -Reason for the failed operation. - -`index_uuid`:: -(string) -The universally unique identifier (UUID) of the index associated with the failed -operation. - -`shard`:: -(string) -ID of the shard associated with the failed operation. - -`index`:: -(string) -Name of the index associated with the failed operation. If the operation -targeted a data stream, this is the backing index into which the document was -attempted to be written. -====== -===== -==== - -[[docs-bulk-api-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -POST _bulk -{ "index" : { "_index" : "test", "_id" : "1" } } -{ "field1" : "value1" } -{ "delete" : { "_index" : "test", "_id" : "2" } } -{ "create" : { "_index" : "test", "_id" : "3" } } -{ "field1" : "value3" } -{ "update" : {"_id" : "1", "_index" : "test"} } -{ "doc" : {"field2" : "value2"} } --------------------------------------------------- - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "took": 30, - "errors": false, - "items": [ - { - "index": { - "_index": "test", - "_id": "1", - "_version": 1, - "result": "created", - "_shards": { - "total": 2, - "successful": 1, - "failed": 0 - }, - "status": 201, - "_seq_no" : 0, - "_primary_term": 1 - } - }, - { - "delete": { - "_index": "test", - "_id": "2", - "_version": 1, - "result": "not_found", - "_shards": { - "total": 2, - "successful": 1, - "failed": 0 - }, - "status": 404, - "_seq_no" : 1, - "_primary_term" : 2 - } - }, - { - "create": { - "_index": "test", - "_id": "3", - "_version": 1, - "result": "created", - "_shards": { - "total": 2, - "successful": 1, - "failed": 0 - }, - "status": 201, - "_seq_no" : 2, - "_primary_term" : 3 - } - }, - { - "update": { - "_index": "test", - "_id": "1", - "_version": 2, - "result": "updated", - "_shards": { - "total": 2, - "successful": 1, - "failed": 0 - }, - "status": 200, - "_seq_no" : 3, - "_primary_term" : 4 - } - } - ] -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 30/"took": $body.took/] -// TESTRESPONSE[s/"index_uuid": .../"index_uuid": $body.items.3.update.error.index_uuid/] -// TESTRESPONSE[s/"_seq_no" : 0/"_seq_no" : $body.items.0.index._seq_no/] -// TESTRESPONSE[s/"_primary_term" : 1/"_primary_term" : $body.items.0.index._primary_term/] -// TESTRESPONSE[s/"_seq_no" : 1/"_seq_no" : $body.items.1.delete._seq_no/] -// TESTRESPONSE[s/"_primary_term" : 2/"_primary_term" : $body.items.1.delete._primary_term/] -// TESTRESPONSE[s/"_seq_no" : 2/"_seq_no" : $body.items.2.create._seq_no/] -// TESTRESPONSE[s/"_primary_term" : 3/"_primary_term" : $body.items.2.create._primary_term/] -// TESTRESPONSE[s/"_seq_no" : 3/"_seq_no" : $body.items.3.update._seq_no/] -// TESTRESPONSE[s/"_primary_term" : 4/"_primary_term" : $body.items.3.update._primary_term/] - -[discrete] -[[bulk-update]] -===== Bulk update example - -When using the `update` action, `retry_on_conflict` can be used as a field in -the action itself (not in the extra payload line), to specify how many -times an update should be retried in the case of a version conflict. - -The `update` action payload supports the following options: `doc` -(partial document), `upsert`, `doc_as_upsert`, `script`, `params` (for -script), `lang` (for script), and `_source`. See update documentation for details on -the options. Example with update actions: - -[source,console] --------------------------------------------------- -POST _bulk -{ "update" : {"_id" : "1", "_index" : "index1", "retry_on_conflict" : 3} } -{ "doc" : {"field" : "value"} } -{ "update" : { "_id" : "0", "_index" : "index1", "retry_on_conflict" : 3} } -{ "script" : { "source": "ctx._source.counter += params.param1", "lang" : "painless", "params" : {"param1" : 1}}, "upsert" : {"counter" : 1}} -{ "update" : {"_id" : "2", "_index" : "index1", "retry_on_conflict" : 3} } -{ "doc" : {"field" : "value"}, "doc_as_upsert" : true } -{ "update" : {"_id" : "3", "_index" : "index1", "_source" : true} } -{ "doc" : {"field" : "value"} } -{ "update" : {"_id" : "4", "_index" : "index1"} } -{ "doc" : {"field" : "value"}, "_source": true} --------------------------------------------------- - -[discrete] -[[bulk-failures-ex]] -===== Example with failed actions - -The following bulk API request includes operations that update non-existent -documents. - -[source,console] ----- -POST /_bulk -{ "update": {"_id": "5", "_index": "index1"} } -{ "doc": {"my_field": "foo"} } -{ "update": {"_id": "6", "_index": "index1"} } -{ "doc": {"my_field": "foo"} } -{ "create": {"_id": "7", "_index": "index1"} } -{ "my_field": "foo" } ----- - -Because these operations cannot complete successfully, the API returns a -response with an `errors` flag of `true`. - -The response also includes an `error` object for any failed operations. The -`error` object contains additional information about the failure, such as the -error type and reason. - -[source,console-result] ----- -{ - "took": 486, - "errors": true, - "items": [ - { - "update": { - "_index": "index1", - "_id": "5", - "status": 404, - "error": { - "type": "document_missing_exception", - "reason": "[5]: document missing", - "index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA", - "shard": "0", - "index": "index1" - } - } - }, - { - "update": { - "_index": "index1", - "_id": "6", - "status": 404, - "error": { - "type": "document_missing_exception", - "reason": "[6]: document missing", - "index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA", - "shard": "0", - "index": "index1" - } - } - }, - { - "create": { - "_index": "index1", - "_id": "7", - "_version": 1, - "result": "created", - "_shards": { - "total": 2, - "successful": 1, - "failed": 0 - }, - "_seq_no": 0, - "_primary_term": 1, - "status": 201 - } - } - ] -} ----- -// TESTRESPONSE[s/"took": 486/"took": $body.took/] -// TESTRESPONSE[s/"_seq_no": 0/"_seq_no": $body.items.2.create._seq_no/] -// TESTRESPONSE[s/"index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA"/"index_uuid": $body.$_path/] - -To return only information about failed operations, use the -<> query parameter with an -argument of `items.*.error`. - -[source,console] ----- -POST /_bulk?filter_path=items.*.error -{ "update": {"_id": "5", "_index": "index1"} } -{ "doc": {"my_field": "baz"} } -{ "update": {"_id": "6", "_index": "index1"} } -{ "doc": {"my_field": "baz"} } -{ "update": {"_id": "7", "_index": "index1"} } -{ "doc": {"my_field": "baz"} } ----- -// TEST[continued] - -The API returns the following result. - -[source,console-result] ----- -{ - "items": [ - { - "update": { - "error": { - "type": "document_missing_exception", - "reason": "[5]: document missing", - "index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA", - "shard": "0", - "index": "index1" - } - } - }, - { - "update": { - "error": { - "type": "document_missing_exception", - "reason": "[6]: document missing", - "index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA", - "shard": "0", - "index": "index1" - } - } - } - ] -} ----- -// TESTRESPONSE[s/"index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA"/"index_uuid": $body.$_path/] - - -[discrete] -[[bulk-dynamic-templates]] -===== Example with dynamic templates parameter - -The below example creates a dynamic template, then performs a bulk request -consisting of index/create requests with the `dynamic_templates` parameter. - -[source,console] ----- -PUT my-index/ -{ - "mappings": { - "dynamic_templates": [ - { - "geo_point": { - "mapping": { - "type" : "geo_point" - } - } - } - ] - } -} - -POST /_bulk -{ "index" : { "_index" : "my_index", "_id" : "1", "dynamic_templates": {"work_location": "geo_point"}} } -{ "field" : "value1", "work_location": "41.12,-71.34", "raw_location": "41.12,-71.34"} -{ "create" : { "_index" : "my_index", "_id" : "2", "dynamic_templates": {"home_location": "geo_point"}} } -{ "field" : "value2", "home_location": "41.12,-71.34"} ----- - -The bulk request creates two new fields `work_location` and `home_location` with type `geo_point` according -to the `dynamic_templates` parameter; however, the `raw_location` field is created using default dynamic mapping -rules, as a `text` field in that case since it is supplied as a string in the JSON document. diff --git a/docs/reference/docs/concurrency-control.asciidoc b/docs/reference/docs/concurrency-control.asciidoc deleted file mode 100644 index 49e18d4838b8f..0000000000000 --- a/docs/reference/docs/concurrency-control.asciidoc +++ /dev/null @@ -1,111 +0,0 @@ -[[optimistic-concurrency-control]] -=== Optimistic concurrency control - -Elasticsearch is distributed. When documents are created, updated, or deleted, -the new version of the document has to be replicated to other nodes in the cluster. -Elasticsearch is also asynchronous and concurrent, meaning that these replication -requests are sent in parallel, and may arrive at their destination out of sequence. -Elasticsearch needs a way of ensuring that an older version of a document never -overwrites a newer version. - - -To ensure an older version of a document doesn't overwrite a newer version, every -operation performed to a document is assigned a sequence number by the primary -shard that coordinates that change. The sequence number is increased with each -operation and thus newer operations are guaranteed to have a higher sequence -number than older operations. Elasticsearch can then use the sequence number of -operations to make sure a newer document version is never overridden by -a change that has a smaller sequence number assigned to it. - -For example, the following indexing command will create a document and assign it -an initial sequence number and primary term: - -[source,console] --------------------------------------------------- -PUT products/_doc/1567 -{ - "product" : "r2d2", - "details" : "A resourceful astromech droid" -} --------------------------------------------------- - -You can see the assigned sequence number and primary term in the -`_seq_no` and `_primary_term` fields of the response: - -[source,console-result] --------------------------------------------------- -{ - "_shards": { - "total": 2, - "failed": 0, - "successful": 1 - }, - "_index": "products", - "_id": "1567", - "_version": 1, - "_seq_no": 362, - "_primary_term": 2, - "result": "created" -} --------------------------------------------------- -// TESTRESPONSE[s/"_seq_no": 362/"_seq_no": $body._seq_no/] -// TESTRESPONSE[s/"_primary_term": 2/"_primary_term": $body._primary_term/] - - -Elasticsearch keeps tracks of the sequence number and primary term of the last -operation to have changed each of the documents it stores. The sequence number -and primary term are returned in the `_seq_no` and `_primary_term` fields in -the response of the <>: - -[source,console] --------------------------------------------------- -GET products/_doc/1567 --------------------------------------------------- -// TEST[continued] - -returns: - -[source,console-result] --------------------------------------------------- -{ - "_index": "products", - "_id": "1567", - "_version": 1, - "_seq_no": 362, - "_primary_term": 2, - "found": true, - "_source": { - "product": "r2d2", - "details": "A resourceful astromech droid" - } -} --------------------------------------------------- -// TESTRESPONSE[s/"_seq_no": 362/"_seq_no": $body._seq_no/] -// TESTRESPONSE[s/"_primary_term": 2/"_primary_term": $body._primary_term/] - - -Note: The <> can return the `_seq_no` and `_primary_term` -for each search hit by setting <>. - -The sequence number and the primary term uniquely identify a change. By noting down -the sequence number and primary term returned, you can make sure to only change the -document if no other change was made to it since you retrieved it. This -is done by setting the `if_seq_no` and `if_primary_term` parameters of the -<>, <>, or <>. - -For example, the following indexing call will make sure to add a tag to the -document without losing any potential change to the description or an addition -of another tag by another API: - -[source,console] --------------------------------------------------- -PUT products/_doc/1567?if_seq_no=362&if_primary_term=2 -{ - "product": "r2d2", - "details": "A resourceful astromech droid", - "tags": [ "droid" ] -} --------------------------------------------------- -// TEST[continued] -// TEST[catch: conflict] diff --git a/docs/reference/docs/data-replication.asciidoc b/docs/reference/docs/data-replication.asciidoc deleted file mode 100644 index 6ee266070e727..0000000000000 --- a/docs/reference/docs/data-replication.asciidoc +++ /dev/null @@ -1,174 +0,0 @@ - -[[docs-replication]] -=== Reading and writing documents - -[discrete] -==== Introduction - -Each index in Elasticsearch is <> -and each shard can have multiple copies. These copies are known as a _replication group_ and must be kept in sync when documents -are added or removed. If we fail to do so, reading from one copy will result in very different results than reading from another. -The process of keeping the shard copies in sync and serving reads from them is what we call the _data replication model_. - -Elasticsearch’s data replication model is based on the _primary-backup model_ and is described very well in the -https://www.microsoft.com/en-us/research/publication/pacifica-replication-in-log-based-distributed-storage-systems/[PacificA paper] of -Microsoft Research. That model is based on having a single copy from the replication group that acts as the primary shard. -The other copies are called _replica shards_. The primary serves as the main entry point for all indexing operations. It is in charge of -validating them and making sure they are correct. Once an index operation has been accepted by the primary, the primary is also -responsible for replicating the operation to the other copies. - -This purpose of this section is to give a high level overview of the Elasticsearch replication model and discuss the implications -it has for various interactions between write and read operations. - -[discrete] -[[basic-write-model]] -==== Basic write model - -Every indexing operation in Elasticsearch is first resolved to a replication group using <>, -typically based on the document ID. Once the replication group has been determined, the operation is forwarded -internally to the current _primary shard_ of the group. This stage of indexing is referred to as the _coordinating stage_. - -image::images/data_processing_flow.png[An example of a basic write model.] - -The next stage of indexing is the _primary stage_, performed on the primary shard. The primary shard is responsible -for validating the operation and forwarding it to the other replicas. Since replicas can be offline, the primary -is not required to replicate to all replicas. Instead, Elasticsearch maintains a list of shard copies that should -receive the operation. This list is called the _in-sync copies_ and is maintained by the master node. As the name implies, -these are the set of "good" shard copies that are guaranteed to have processed all of the index and delete operations that -have been acknowledged to the user. The primary is responsible for maintaining this invariant and thus has to replicate all -operations to each copy in this set. - -The primary shard follows this basic flow: - -. Validate incoming operation and reject it if structurally invalid (Example: have an object field where a number is expected) -. Execute the operation locally i.e. indexing or deleting the relevant document. This will also validate the content of fields - and reject if needed (Example: a keyword value is too long for indexing in Lucene). -. Forward the operation to each replica in the current in-sync copies set. If there are multiple replicas, this is done in parallel. -. Once all in-sync replicas have successfully performed the operation and responded to the primary, the primary acknowledges the successful - completion of the request to the client. - -Each in-sync replica copy performs the indexing operation locally so that it has a copy. This stage of indexing is the -_replica stage_. - -These indexing stages (coordinating, primary, and replica) are sequential. To enable internal retries, the lifetime of each stage -encompasses the lifetime of each subsequent stage. For example, the coordinating stage is not complete until each primary -stage, which may be spread out across different primary shards, has completed. Each primary stage will not complete until the -in-sync replicas have finished indexing the docs locally and responded to the replica requests. - -[discrete] -===== Failure handling - -Many things can go wrong during indexing -- disks can get corrupted, nodes can be disconnected from each other, or some -configuration mistake could cause an operation to fail on a replica despite it being successful on the primary. These -are infrequent but the primary has to respond to them. - -In the case that the primary itself fails, the node hosting the primary will send a message to the master about it. The indexing -operation will wait (up to 1 minute, by <>) for the master to promote one of the replicas to be a -new primary. The operation will then be forwarded to the new primary for processing. Note that the master also monitors the -health of the nodes and may decide to proactively demote a primary. This typically happens when the node holding the primary -is isolated from the cluster by a networking issue. See <> for more details. - -Once the operation has been successfully performed on the primary, the primary has to deal with potential failures -when executing it on the replica shards. This may be caused by an actual failure on the replica or due to a network -issue preventing the operation from reaching the replica (or preventing the replica from responding). All of these -share the same end result: a replica which is part of the in-sync replica set misses an operation that is about to -be acknowledged. In order to avoid violating the invariant, the primary sends a message to the master requesting -that the problematic shard be removed from the in-sync replica set. Only once removal of the shard has been acknowledged -by the master does the primary acknowledge the operation. Note that the master will also instruct another node to start -building a new shard copy in order to restore the system to a healthy state. - -[[demoted-primary]] -While forwarding an operation to the replicas, the primary will use the replicas to validate that it is still the -active primary. If the primary has been isolated due to a network partition (or a long GC) it may continue to process -incoming indexing operations before realising that it has been demoted. Operations that come from a stale primary -will be rejected by the replicas. When the primary receives a response from the replica rejecting its request because -it is no longer the primary then it will reach out to the master and will learn that it has been replaced. The -operation is then routed to the new primary. - -.What happens if there are no replicas? -************ -This is a valid scenario that can happen due to index configuration or simply -because all the replicas have failed. In that case the primary is processing operations without any external validation, -which may seem problematic. On the other hand, the primary cannot fail other shards on its own but request the master to do -so on its behalf. This means that the master knows that the primary is the only single good copy. We are therefore guaranteed -that the master will not promote any other (out-of-date) shard copy to be a new primary and that any operation indexed -into the primary will not be lost. Of course, since at that point we are running with only single copy of the data, physical hardware -issues can cause data loss. See <> for some mitigation options. -************ - -[discrete] -==== Basic read model - -Reads in Elasticsearch can be very lightweight lookups by ID or a heavy search request with complex aggregations that -take non-trivial CPU power. One of the beauties of the primary-backup model is that it keeps all shard copies identical -(with the exception of in-flight operations). As such, a single in-sync copy is sufficient to serve read requests. - -When a read request is received by a node, that node is responsible for forwarding it to the nodes that hold the relevant shards, -collating the responses, and responding to the client. We call that node the _coordinating node_ for that request. The basic flow -is as follows: - -. Resolve the read requests to the relevant shards. Note that since most searches will be sent to one or more indices, - they typically need to read from multiple shards, each representing a different subset of the data. -. Select an active copy of each relevant shard, from the shard replication group. This can be either the primary or - a replica. By default, {es} uses <> to select the shard copies. -. Send shard level read requests to the selected copies. -. Combine the results and respond. Note that in the case of get by ID look up, only one shard is relevant and this step can be skipped. - -[discrete] -[[shard-failures]] -===== Shard failures - -When a shard fails to respond to a read request, the coordinating node sends the -request to another shard copy in the same replication group. Repeated failures -can result in no available shard copies. - -To ensure fast responses, the following APIs will -respond with partial results if one or more shards fail: - -* <> -* <> -* <> - -Responses containing partial results still provide a `200 OK` HTTP status code. -Shard failures are indicated by the `timed_out` and `_shards` fields of -the response header. - -[discrete] -==== A few simple implications - -Each of these basic flows determines how Elasticsearch behaves as a system for both reads and writes. Furthermore, since read -and write requests can be executed concurrently, these two basic flows interact with each other. This has a few inherent implications: - -Efficient reads:: Under normal operation each read operation is performed once for each relevant replication group. - Only under failure conditions do multiple copies of the same shard execute the same search. - -Read unacknowledged:: Since the primary first indexes locally and then replicates the request, it is possible for a - concurrent read to already see the change before it has been acknowledged. - -Two copies by default:: This model can be fault tolerant while maintaining only two copies of the data. This is in contrast to - quorum-based system where the minimum number of copies for fault tolerance is 3. - -[discrete] -==== Failures - -Under failures, the following is possible: - -A single shard can slow down indexing:: Because the primary waits for all replicas in the in-sync copies set during each operation, - a single slow shard can slow down the entire replication group. This is the price we pay for the read efficiency mentioned above. - Of course a single slow shard will also slow down unlucky searches that have been routed to it. - -Dirty reads:: An isolated primary can expose writes that will not be acknowledged. This is caused by the fact that an isolated - primary will only realize that it is isolated once it sends requests to its replicas or when reaching out to the master. - At that point the operation is already indexed into the primary and can be read by a concurrent read. Elasticsearch mitigates - this risk by pinging the master every second (by default) and rejecting indexing operations if no master is known. - -[discrete] -==== The Tip of the Iceberg - -This document provides a high level overview of how Elasticsearch deals with data. Of course, there is much more -going on under the hood. Things like primary terms, cluster state publishing, and master election all play a role in -keeping this system behaving correctly. This document also doesn't cover known and important -bugs (both closed and open). We recognize that https://github.com/elastic/elasticsearch/issues?q=label%3Aresiliency[GitHub is hard to keep up with]. -To help people stay on top of those, we maintain a dedicated https://www.elastic.co/guide/en/elasticsearch/resiliency/current/index.html[resiliency page] -on our website. We strongly advise reading it. diff --git a/docs/reference/docs/delete-by-query.asciidoc b/docs/reference/docs/delete-by-query.asciidoc deleted file mode 100644 index 8cde1da91121a..0000000000000 --- a/docs/reference/docs/delete-by-query.asciidoc +++ /dev/null @@ -1,702 +0,0 @@ -[[docs-delete-by-query]] -=== Delete by query API -++++ -Delete by query -++++ - -Deletes documents that match the specified query. - -[source,console] --------------------------------------------------- -POST /my-index-000001/_delete_by_query -{ - "query": { - "match": { - "user.id": "elkbee" - } - } -} --------------------------------------------------- -// TEST[setup:my_index_big] - -//// - -[source,console-result] --------------------------------------------------- -{ - "took" : 147, - "timed_out": false, - "deleted": 119, - "batches": 1, - "version_conflicts": 0, - "noops": 0, - "retries": { - "bulk": 0, - "search": 0 - }, - "throttled_millis": 0, - "requests_per_second": -1.0, - "throttled_until_millis": 0, - "total": 119, - "failures" : [ ] -} --------------------------------------------------- -// TESTRESPONSE[s/"took" : 147/"took" : "$body.took"/] -//// - -[[docs-delete-by-query-api-request]] -==== {api-request-title} - -`POST //_delete_by_query` - -[[docs-delete-by-query-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the following -<> for the target data stream, index, -or alias: - -** `read` -** `delete` or `write` - -[[docs-delete-by-query-api-desc]] -==== {api-description-title} - -You can specify the query criteria in the request URI or the request body -using the same syntax as the <>. - -When you submit a delete by query request, {es} gets a snapshot of the data stream or index -when it begins processing the request and deletes matching documents using -`internal` versioning. If a document changes between the time that the -snapshot is taken and the delete operation is processed, it results in a version -conflict and the delete operation fails. - -NOTE: Documents with a version equal to 0 cannot be deleted using delete by -query because `internal` versioning does not support 0 as a valid -version number. - -While processing a delete by query request, {es} performs multiple search -requests sequentially to find all of the matching documents to delete. A bulk -delete request is performed for each batch of matching documents. If a -search or bulk request is rejected, the requests are retried up to 10 times, with -exponential back off. If the maximum retry limit is reached, processing halts -and all failed requests are returned in the response. Any delete requests that -completed successfully still stick, they are not rolled back. - -You can opt to count version conflicts instead of halting and returning by -setting `conflicts` to `proceed`. Note that if you opt to count version conflicts -the operation could attempt to delete more documents from the source -than `max_docs` until it has successfully deleted `max_docs` documents, or it has gone through -every document in the source query. - -===== Refreshing shards - -Specifying the `refresh` parameter refreshes all shards involved in the delete -by query once the request completes. This is different than the delete API's -`refresh` parameter, which causes just the shard that received the delete -request to be refreshed. Unlike the delete API, it does not support -`wait_for`. - -[[docs-delete-by-query-task-api]] -===== Running delete by query asynchronously - -If the request contains `wait_for_completion=false`, {es} -performs some preflight checks, launches the request, and returns a -<> you can use to cancel or get the status of the task. {es} creates a -record of this task as a document at `.tasks/task/${taskId}`. When you are -done with a task, you should delete the task document so {es} can reclaim the -space. - -===== Waiting for active shards - -`wait_for_active_shards` controls how many copies of a shard must be active -before proceeding with the request. See <> -for details. `timeout` controls how long each write request waits for unavailable -shards to become available. Both work exactly the way they work in the -<>. Delete by query uses scrolled searches, so you can also -specify the `scroll` parameter to control how long it keeps the search context -alive, for example `?scroll=10m`. The default is 5 minutes. - -[[docs-delete-by-query-throttle]] -===== Throttling delete requests - -To control the rate at which delete by query issues batches of delete operations, -you can set `requests_per_second` to any positive decimal number. This pads each -batch with a wait time to throttle the rate. Set `requests_per_second` to `-1` -to disable throttling. - -Throttling uses a wait time between batches so that the internal scroll requests -can be given a timeout that takes the request padding into account. The padding -time is the difference between the batch size divided by the -`requests_per_second` and the time spent writing. By default the batch size is -`1000`, so if `requests_per_second` is set to `500`: - -[source,txt] --------------------------------------------------- -target_time = 1000 / 500 per second = 2 seconds -wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds --------------------------------------------------- - -Since the batch is issued as a single `_bulk` request, large batch sizes -cause {es} to create many requests and wait before starting the next set. -This is "bursty" instead of "smooth". - -[[docs-delete-by-query-slice]] -===== Slicing - -Delete by query supports <> to parallelize the -delete process. This can improve efficiency and provide a -convenient way to break the request down into smaller parts. - -Setting `slices` to `auto` chooses a reasonable number for most data streams and indices. -If you're slicing manually or otherwise tuning automatic slicing, keep in mind -that: - -* Query performance is most efficient when the number of `slices` is equal to -the number of shards in the index or backing index. If that number is large (for example, -500), choose a lower number as too many `slices` hurts performance. Setting -`slices` higher than the number of shards generally does not improve efficiency -and adds overhead. - -* Delete performance scales linearly across available resources with the -number of slices. - -Whether query or delete performance dominates the runtime depends on the -documents being reindexed and cluster resources. - -[[docs-delete-by-query-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Comma-separated list of data streams, indices, and aliases to -search. Supports wildcards (`*`). To search all data streams or indices, omit -this parameter or use `* or `_all`. - -[[docs-delete-by-query-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=allow-no-indices] -+ -Defaults to `true`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=analyzer] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=analyze_wildcard] - -`conflicts`:: - (Optional, string) What to do if delete by query hits version conflicts: - `abort` or `proceed`. Defaults to `abort`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=default_operator] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=df] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=expand-wildcards] -+ -Defaults to `open`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=index-ignore-unavailable] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=lenient] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=max_docs] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=preference] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=search-q] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=request_cache] - -`refresh`:: -(Optional, Boolean) If `true`, {es} refreshes all shards involved in the -delete by query after the request completes. Defaults to `false`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=requests_per_second] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=routing] - -`scroll`:: -(Optional, <>) -Period to retain the <> for scrolling. See -<>. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=scroll_size] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=search_type] - -`search_timeout`:: -(Optional, <>) -Explicit timeout for each search request. -Defaults to no timeout. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=slices] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=sort] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=stats] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=terminate_after] - -`timeout`:: -(Optional, <>) -Period each deletion request <>. Defaults to `1m` (one minute). - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=version] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=wait_for_active_shards] - -[[docs-delete-by-query-api-request-body]] -==== {api-request-body-title} - -`query`:: - (Optional, <>) Specifies the documents to delete - using the <>. - - -[[docs-delete-by-query-api-response-body]] -==== Response body - -////////////////////////// - -[source,console] --------------------------------------------------- -POST /my-index-000001/_delete_by_query -{ - "query": { <1> - "match": { - "user.id": "elkbee" - } - } -} --------------------------------------------------- -// TEST[setup:my_index_big] - -////////////////////////// - -The JSON response looks like this: - -[source,console-result] --------------------------------------------------- -{ - "took" : 147, - "timed_out": false, - "total": 119, - "deleted": 119, - "batches": 1, - "version_conflicts": 0, - "noops": 0, - "retries": { - "bulk": 0, - "search": 0 - }, - "throttled_millis": 0, - "requests_per_second": -1.0, - "throttled_until_millis": 0, - "failures" : [ ] -} --------------------------------------------------- -// TESTRESPONSE[s/: [0-9]+/: $body.$_path/] - -`took`:: - -The number of milliseconds from start to end of the whole operation. - -`timed_out`:: - -This flag is set to `true` if any of the requests executed during the -delete by query execution has timed out. - -`total`:: - -The number of documents that were successfully processed. - -`deleted`:: - -The number of documents that were successfully deleted. - -`batches`:: - -The number of scroll responses pulled back by the delete by query. - -`version_conflicts`:: - -The number of version conflicts that the delete by query hit. - -`noops`:: - -This field is always equal to zero for delete by query. It only exists -so that delete by query, update by query, and reindex APIs return responses - with the same structure. - -`retries`:: - -The number of retries attempted by delete by query. `bulk` is the number -of bulk actions retried, and `search` is the number of search actions retried. - -`throttled_millis`:: - -Number of milliseconds the request slept to conform to `requests_per_second`. - -`requests_per_second`:: - -The number of requests per second effectively executed during the delete by query. - -`throttled_until_millis`:: - -This field should always be equal to zero in a `_delete_by_query` response. It only -has meaning when using the <>, where it -indicates the next time (in milliseconds since epoch) a throttled request will be -executed again in order to conform to `requests_per_second`. - -`failures`:: - -Array of failures if there were any unrecoverable errors during the process. If -this is non-empty then the request aborted because of those failures. -Delete by query is implemented using batches, and any failure causes the entire -process to abort but all failures in the current batch are collected into the -array. You can use the `conflicts` option to prevent reindex from aborting on -version conflicts. - -[[docs-delete-by-query-api-example]] -==== {api-examples-title} - -Delete all documents from the `my-index-000001` data stream or index: - -[source,console] --------------------------------------------------- -POST my-index-000001/_delete_by_query?conflicts=proceed -{ - "query": { - "match_all": {} - } -} --------------------------------------------------- -// TEST[setup:my_index] - -Delete documents from multiple data streams or indices: - -[source,console] --------------------------------------------------- -POST /my-index-000001,my-index-000002/_delete_by_query -{ - "query": { - "match_all": {} - } -} --------------------------------------------------- -// TEST[s/^/PUT my-index-000001\nPUT my-index-000002\n/] - -Limit the delete by query operation to shards that a particular routing -value: - -[source,console] --------------------------------------------------- -POST my-index-000001/_delete_by_query?routing=1 -{ - "query": { - "range" : { - "age" : { - "gte" : 10 - } - } - } -} --------------------------------------------------- -// TEST[setup:my_index] - -By default `_delete_by_query` uses scroll batches of 1000. You can change the -batch size with the `scroll_size` URL parameter: - -[source,console] --------------------------------------------------- -POST my-index-000001/_delete_by_query?scroll_size=5000 -{ - "query": { - "term": { - "user.id": "kimchy" - } - } -} --------------------------------------------------- -// TEST[setup:my_index] - -Delete a document using a unique attribute: - -[source,console] --------------------------------------------------- -POST my-index-000001/_delete_by_query -{ - "query": { - "term": { - "user.id": "kimchy" - } - }, - "max_docs": 1 -} --------------------------------------------------- -// TEST[setup:my_index] - -[discrete] -[[docs-delete-by-query-manual-slice]] -===== Slice manually - -Slice a delete by query manually by providing a slice id and total number of -slices: - -[source,console] ----------------------------------------------------------------- -POST my-index-000001/_delete_by_query -{ - "slice": { - "id": 0, - "max": 2 - }, - "query": { - "range": { - "http.response.bytes": { - "lt": 2000000 - } - } - } -} -POST my-index-000001/_delete_by_query -{ - "slice": { - "id": 1, - "max": 2 - }, - "query": { - "range": { - "http.response.bytes": { - "lt": 2000000 - } - } - } -} ----------------------------------------------------------------- -// TEST[setup:my_index_big] - -Which you can verify works with: - -[source,console] ----------------------------------------------------------------- -GET _refresh -POST my-index-000001/_search?size=0&filter_path=hits.total -{ - "query": { - "range": { - "http.response.bytes": { - "lt": 2000000 - } - } - } -} ----------------------------------------------------------------- -// TEST[continued] - -Which results in a sensible `total` like this one: - -[source,console-result] ----------------------------------------------------------------- -{ - "hits": { - "total" : { - "value": 0, - "relation": "eq" - } - } -} ----------------------------------------------------------------- - -[discrete] -[[docs-delete-by-query-automatic-slice]] -===== Use automatic slicing - -You can also let delete-by-query automatically parallelize using -<> to slice on `_id`. Use `slices` to specify -the number of slices to use: - -[source,console] ----------------------------------------------------------------- -POST my-index-000001/_delete_by_query?refresh&slices=5 -{ - "query": { - "range": { - "http.response.bytes": { - "lt": 2000000 - } - } - } -} ----------------------------------------------------------------- -// TEST[setup:my_index_big] - -Which you also can verify works with: - -[source,console] ----------------------------------------------------------------- -POST my-index-000001/_search?size=0&filter_path=hits.total -{ - "query": { - "range": { - "http.response.bytes": { - "lt": 2000000 - } - } - } -} ----------------------------------------------------------------- -// TEST[continued] - -Which results in a sensible `total` like this one: - -[source,console-result] ----------------------------------------------------------------- -{ - "hits": { - "total" : { - "value": 0, - "relation": "eq" - } - } -} ----------------------------------------------------------------- - -Setting `slices` to `auto` will let {es} choose the number of slices -to use. This setting will use one slice per shard, up to a certain limit. If -there are multiple source data streams or indices, it will choose the number of slices based -on the index or backing index with the smallest number of shards. - -Adding `slices` to `_delete_by_query` just automates the manual process used in -the section above, creating sub-requests which means it has some quirks: - -* You can see these requests in the -<>. These sub-requests are "child" -tasks of the task for the request with `slices`. -* Fetching the status of the task for the request with `slices` only contains -the status of completed slices. -* These sub-requests are individually addressable for things like cancellation -and rethrottling. -* Rethrottling the request with `slices` will rethrottle the unfinished -sub-request proportionally. -* Canceling the request with `slices` will cancel each sub-request. -* Due to the nature of `slices` each sub-request won't get a perfectly even -portion of the documents. All documents will be addressed, but some slices may -be larger than others. Expect larger slices to have a more even distribution. -* Parameters like `requests_per_second` and `max_docs` on a request with -`slices` are distributed proportionally to each sub-request. Combine that with -the point above about distribution being uneven and you should conclude that -using `max_docs` with `slices` might not result in exactly `max_docs` documents -being deleted. -* Each sub-request gets a slightly different snapshot of the source data stream or index -though these are all taken at approximately the same time. - -[discrete] -[[docs-delete-by-query-rethrottle]] -===== Change throttling for a request - -The value of `requests_per_second` can be changed on a running delete by query -using the `_rethrottle` API. Rethrottling that speeds up the -query takes effect immediately but rethrotting that slows down the query -takes effect after completing the current batch to prevent scroll -timeouts. - -[source,console] --------------------------------------------------- -POST _delete_by_query/r1A2WoRbTwKZ516z6NEs5A:36619/_rethrottle?requests_per_second=-1 --------------------------------------------------- - -Use the <> to get the task ID. Set `requests_per_second` -to any positive decimal value or `-1` to disable throttling. - -===== Get the status of a delete by query operation - -Use the <> to get the status of a delete by query -operation: - - -[source,console] --------------------------------------------------- -GET _tasks?detailed=true&actions=*/delete/byquery --------------------------------------------------- -// TEST[skip:No tasks to retrieve] - -The response looks like: - -[source,console-result] --------------------------------------------------- -{ - "nodes" : { - "r1A2WoRbTwKZ516z6NEs5A" : { - "name" : "r1A2WoR", - "transport_address" : "127.0.0.1:9300", - "host" : "127.0.0.1", - "ip" : "127.0.0.1:9300", - "attributes" : { - "testattr" : "test", - "portsfile" : "true" - }, - "tasks" : { - "r1A2WoRbTwKZ516z6NEs5A:36619" : { - "node" : "r1A2WoRbTwKZ516z6NEs5A", - "id" : 36619, - "type" : "transport", - "action" : "indices:data/write/delete/byquery", - "status" : { <1> - "total" : 6154, - "updated" : 0, - "created" : 0, - "deleted" : 3500, - "batches" : 36, - "version_conflicts" : 0, - "noops" : 0, - "retries": 0, - "throttled_millis": 0 - }, - "description" : "" - } - } - } - } -} --------------------------------------------------- - -<1> This object contains the actual status. It is just like the response JSON -with the important addition of the `total` field. `total` is the total number -of operations that the reindex expects to perform. You can estimate the -progress by adding the `updated`, `created`, and `deleted` fields. The request -will finish when their sum is equal to the `total` field. - -With the task id you can look up the task directly: - -[source,console] --------------------------------------------------- -GET /_tasks/r1A2WoRbTwKZ516z6NEs5A:36619 --------------------------------------------------- -// TEST[catch:missing] - -The advantage of this API is that it integrates with `wait_for_completion=false` -to transparently return the status of completed tasks. If the task is completed -and `wait_for_completion=false` was set on it then it'll come back with -`results` or an `error` field. The cost of this feature is the document that -`wait_for_completion=false` creates at `.tasks/task/${taskId}`. It is up to -you to delete that document. - - -[discrete] -[[docs-delete-by-query-cancel-task-api]] -===== Cancel a delete by query operation - -Any delete by query can be canceled using the <>: - -[source,console] --------------------------------------------------- -POST _tasks/r1A2WoRbTwKZ516z6NEs5A:36619/_cancel --------------------------------------------------- - -The task ID can be found using the <>. - -Cancellation should happen quickly but might take a few seconds. The task status -API above will continue to list the delete by query task until this task checks that it -has been cancelled and terminates itself. diff --git a/docs/reference/docs/delete.asciidoc b/docs/reference/docs/delete.asciidoc deleted file mode 100644 index 046a20abdaffb..0000000000000 --- a/docs/reference/docs/delete.asciidoc +++ /dev/null @@ -1,211 +0,0 @@ -[[docs-delete]] -=== Delete API -++++ -Delete -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-document[Document APIs]. --- - -Removes a JSON document from the specified index. - -[[docs-delete-api-request]] -==== {api-request-title} - -`DELETE //_doc/<_id>` - -[[docs-delete-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `delete` or -`write` <> for the target index or -index alias. - -[[docs-delete-api-desc]] -==== {api-description-title} - -You use DELETE to remove a document from an index. You must specify the -index name and document ID. - -NOTE: You cannot send deletion requests directly to a data stream. To delete a -document in a data stream, you must target the backing index containing the -document. See <>. - -[discrete] -[[optimistic-concurrency-control-delete]] -===== Optimistic concurrency control - -Delete operations can be made conditional and only be performed if the last -modification to the document was assigned the sequence number and primary -term specified by the `if_seq_no` and `if_primary_term` parameters. If a -mismatch is detected, the operation will result in a `VersionConflictException` -and a status code of 409. See <> for more details. - -[discrete] -[[delete-versioning]] -===== Versioning - -Each document indexed is versioned. When deleting a document, the `version` can -be specified to make sure the relevant document we are trying to delete is -actually being deleted and it has not changed in the meantime. Every write -operation executed on a document, deletes included, causes its version to be -incremented. The version number of a deleted document remains available for a -short time after deletion to allow for control of concurrent operations. The -length of time for which a deleted document's version remains available is -determined by the `index.gc_deletes` index setting and defaults to 60 seconds. - -[discrete] -[[delete-routing]] -===== Routing - -If routing is used during indexing, the routing value also needs to be -specified to delete a document. - -If the `_routing` mapping is set to `required` and no routing value is -specified, the delete API throws a `RoutingMissingException` and rejects -the request. - -For example: - - -//// -Example to delete with routing - -[source,console] --------------------------------------------------- -PUT /my-index-000001/_doc/1?routing=shard-1 -{ - "test": "test" -} --------------------------------------------------- -//// - - -[source,console] --------------------------------------------------- -DELETE /my-index-000001/_doc/1?routing=shard-1 --------------------------------------------------- -// TEST[continued] - -This request deletes the document with id `1`, but it is routed based on the -user. The document is not deleted if the correct routing is not specified. - -[discrete] -[[delete-index-creation]] -===== Automatic index creation - -If an <> is used, -the delete operation automatically creates the specified index if it does not -exist. For information about manually creating indices, see -<>. - -[discrete] -[[delete-distributed]] -===== Distributed - -The delete operation gets hashed into a specific shard id. It then gets -redirected into the primary shard within that id group, and replicated -(if needed) to shard replicas within that id group. - -[discrete] -[[delete-wait-for-active-shards]] -===== Wait for active shards - -When making delete requests, you can set the `wait_for_active_shards` -parameter to require a minimum number of shard copies to be active -before starting to process the delete request. See -<> for further details and a usage -example. - -[discrete] -[[delete-refresh]] -===== Refresh - -Control when the changes made by this request are visible to search. See -<>. - -[discrete] -[[delete-timeout]] -===== Timeout - -The primary shard assigned to perform the delete operation might not be -available when the delete operation is executed. Some reasons for this -might be that the primary shard is currently recovering from a store -or undergoing relocation. By default, the delete operation will wait on -the primary shard to become available for up to 1 minute before failing -and responding with an error. The `timeout` parameter can be used to -explicitly specify how long it waits. Here is an example of setting it -to 5 minutes: - -[source,console] --------------------------------------------------- -DELETE /my-index-000001/_doc/1?timeout=5m --------------------------------------------------- -// TEST[setup:my_index] - -[[docs-delete-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) Name of the target index. - -`<_id>`:: -(Required, string) Unique identifier for the document. - -[[docs-delete-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=if_seq_no] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=if_primary_term] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=refresh] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=routing] - -`timeout`:: -(Optional, <>) -Period to <>. Defaults to -`1m` (one minute). - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=doc-version] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=version_type] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=wait_for_active_shards] - -[[docs-delete-api-example]] -==== {api-examples-title} - -Delete the JSON document `1` from the `my-index-000001` index: - -[source,console] --------------------------------------------------- -DELETE /my-index-000001/_doc/1 --------------------------------------------------- -// TEST[setup:my_index] - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "_shards": { - "total": 2, - "failed": 0, - "successful": 2 - }, - "_index": "my-index-000001", - "_id": "1", - "_version": 2, - "_primary_term": 1, - "_seq_no": 5, - "result": "deleted" -} --------------------------------------------------- -// TESTRESPONSE[s/"successful": 2/"successful": 1/] -// TESTRESPONSE[s/"_primary_term": 1/"_primary_term": $body._primary_term/] -// TESTRESPONSE[s/"_seq_no": 5/"_seq_no": $body._seq_no/] diff --git a/docs/reference/docs/get.asciidoc b/docs/reference/docs/get.asciidoc deleted file mode 100644 index a3ff70fb95f6e..0000000000000 --- a/docs/reference/docs/get.asciidoc +++ /dev/null @@ -1,428 +0,0 @@ -[[docs-get]] -=== Get API -++++ -Get -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-document[Document APIs]. --- - -Retrieves the specified JSON document from an index. - -[source,console] --------------------------------------------------- -GET my-index-000001/_doc/0 --------------------------------------------------- -// TEST[setup:my_index] - -[[docs-get-api-request]] -==== {api-request-title} - -`GET /_doc/<_id>` - -`HEAD /_doc/<_id>` - -`GET /_source/<_id>` - -`HEAD /_source/<_id>` - -[[docs-get-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `read` -<> for the target index or index alias. - -[[docs-get-api-desc]] -==== {api-description-title} -You use GET to retrieve a document and its source or stored fields from a -particular index. Use HEAD to verify that a document exists. You can -use the `_source` resource retrieve just the document source or verify -that it exists. - -[discrete] -[[realtime]] -===== Realtime - -By default, the get API is realtime, and is not affected by the refresh -rate of the index (when data will become visible for search). In case where -stored fields are requested (see `stored_fields` parameter) and the document -has been updated but is not yet refreshed, the get API will have to parse -and analyze the source to extract the stored fields. In order to disable -realtime GET, the `realtime` parameter can be set to `false`. - -[discrete] -[[get-source-filtering]] -===== Source filtering - -By default, the get operation returns the contents of the `_source` field unless -you have used the `stored_fields` parameter or if the `_source` field is disabled. -You can turn off `_source` retrieval by using the `_source` parameter: - -[source,console] --------------------------------------------------- -GET my-index-000001/_doc/0?_source=false --------------------------------------------------- -// TEST[setup:my_index] - -If you only need one or two fields from the `_source`, use the `_source_includes` -or `_source_excludes` parameters to include or filter out particular fields. -This can be especially helpful with large documents where partial retrieval can -save on network overhead. Both parameters take a comma separated list -of fields or wildcard expressions. Example: - -[source,console] --------------------------------------------------- -GET my-index-000001/_doc/0?_source_includes=*.id&_source_excludes=entities --------------------------------------------------- -// TEST[setup:my_index] - -If you only want to specify includes, you can use a shorter notation: - -[source,console] --------------------------------------------------- -GET my-index-000001/_doc/0?_source=*.id --------------------------------------------------- -// TEST[setup:my_index] - -[discrete] -[[get-routing]] -===== Routing - -If routing is used during indexing, the routing value also needs to be -specified to retrieve a document. For example: - -[source,console] --------------------------------------------------- -GET my-index-000001/_doc/2?routing=user1 --------------------------------------------------- -// TEST[continued] - -This request gets the document with id `2`, but it is routed based on the -user. The document is not fetched if the correct routing is not specified. - -[discrete] -[[preference]] -===== Preference - -Controls a `preference` of which shard replicas to execute the get -request on. By default, the operation is randomized between the shard -replicas. - -The `preference` can be set to: - -`_local`:: - The operation will prefer to be executed on a local - allocated shard if possible. - -Custom (string) value:: - A custom value will be used to guarantee that - the same shards will be used for the same custom value. This can help - with "jumping values" when hitting different shards in different refresh - states. A sample value can be something like the web session id, or the - user name. - -[discrete] -[[get-refresh]] -===== Refresh - -The `refresh` parameter can be set to `true` in order to refresh the -relevant shard before the get operation and make it searchable. Setting -it to `true` should be done after careful thought and verification that -this does not cause a heavy load on the system (and slows down -indexing). - -[discrete] -[[get-distributed]] -===== Distributed - -The get operation gets hashed into a specific shard id. It then gets -redirected to one of the replicas within that shard id and returns the -result. The replicas are the primary shard and its replicas within that -shard id group. This means that the more replicas we have, the -better GET scaling we will have. - -[discrete] -[[get-versioning]] -===== Versioning support - -You can use the `version` parameter to retrieve the document only if -its current version is equal to the specified one. - -Internally, Elasticsearch has marked the old document as deleted and added an -entirely new document. The old version of the document doesn’t disappear -immediately, although you won’t be able to access it. Elasticsearch cleans up -deleted documents in the background as you continue to index more data. - -[[docs-get-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) Name of the index that contains the document. - -`<_id>`:: -(Required, string) Unique identifier of the document. - -[[docs-get-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=preference] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=realtime] - -`refresh`:: -(Optional, Boolean) If `true`, the request refreshes the relevant shard before -retrieving the document. Defaults to `false`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=routing] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=stored_fields] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=source] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=source_excludes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=source_includes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=doc-version] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=version_type] - -[[docs-get-api-response-body]] -==== {api-response-body-title} - -`_index`:: -The name of the index the document belongs to. - -`_id`:: -The unique identifier for the document. - -`_version`:: -The document version. Incremented each time the document is updated. - -`_seq_no`:: -The sequence number assigned to the document for the indexing -operation. Sequence numbers are used to ensure an older version of a document -doesn’t overwrite a newer version. See <>. - -`_primary_term`:: -The primary term assigned to the document for the indexing operation. -See <>. - -`found`:: -Indicates whether the document exists: `true` or `false`. - -`_routing`:: -The explicit routing, if set. - -'_source':: -If `found` is `true`, contains the document data formatted in JSON. -Excluded if the `_source` parameter is set to `false` or the `stored_fields` -parameter is set to `true`. - -'_fields':: -If the `stored_fields` parameter is set to `true` and `found` is -`true`, contains the document fields stored in the index. - -[[docs-get-api-example]] -==== {api-examples-title} - -Retrieve the JSON document with the `_id` 0 from the `my-index-000001` index: - -[source,console] --------------------------------------------------- -GET my-index-000001/_doc/0 --------------------------------------------------- -// TEST[setup:my_index] - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "_index": "my-index-000001", - "_id": "0", - "_version": 1, - "_seq_no": 0, - "_primary_term": 1, - "found": true, - "_source": { - "@timestamp": "2099-11-15T14:12:12", - "http": { - "request": { - "method": "get" - }, - "response": { - "status_code": 200, - "bytes": 1070000 - }, - "version": "1.1" - }, - "source": { - "ip": "127.0.0.1" - }, - "message": "GET /search HTTP/1.1 200 1070000", - "user": { - "id": "kimchy" - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"_seq_no": \d+/"_seq_no": $body._seq_no/ s/"_primary_term": 1/"_primary_term": $body._primary_term/] - -Check to see if a document with the `_id` 0 exists: - -[source,console] --------------------------------------------------- -HEAD my-index-000001/_doc/0 --------------------------------------------------- -// TEST[setup:my_index] - -{es} returns a status code of `200 - OK` if the document exists, or -`404 - Not Found` if it doesn't. - -[discrete] -[[_source]] -===== Get the source field only - -Use the `/_source/` resource to get -just the `_source` field of a document. For example: - -[source,console] --------------------------------------------------- -GET my-index-000001/_source/1 --------------------------------------------------- -// TEST[continued] - -You can use the source filtering parameters to control which parts of the -`_source` are returned: - -[source,console] --------------------------------------------------- -GET my-index-000001/_source/1/?_source_includes=*.id&_source_excludes=entities --------------------------------------------------- -// TEST[continued] - -You can use HEAD with the `_source` endpoint to efficiently -test whether or not the document _source exists. A document's source is not -available if it is disabled in the <>. - -[source,console] --------------------------------------------------- -HEAD my-index-000001/_source/1 --------------------------------------------------- -// TEST[continued] - -[discrete] -[[get-stored-fields]] -===== Get stored fields - -Use the `stored_fields` parameter to specify the set of stored fields you want -to retrieve. Any requested fields that are not stored are ignored. -Consider for instance the following mapping: - -[source,console] --------------------------------------------------- -PUT my-index-000001 -{ - "mappings": { - "properties": { - "counter": { - "type": "integer", - "store": false - }, - "tags": { - "type": "keyword", - "store": true - } - } - } -} --------------------------------------------------- - -Now we can add a document: - -[source,console] --------------------------------------------------- -PUT my-index-000001/_doc/1 -{ - "counter": 1, - "tags": [ "production" ] -} --------------------------------------------------- -// TEST[continued] - -And then try to retrieve it: - -[source,console] --------------------------------------------------- -GET my-index-000001/_doc/1?stored_fields=tags,counter --------------------------------------------------- -// TEST[continued] - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "_index": "my-index-000001", - "_id": "1", - "_version": 1, - "_seq_no" : 22, - "_primary_term" : 1, - "found": true, - "fields": { - "tags": [ - "production" - ] - } -} --------------------------------------------------- -// TESTRESPONSE[s/"_seq_no" : \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/] - -Field values fetched from the document itself are always returned as an array. -Since the `counter` field is not stored, the get request ignores it. - -You can also retrieve metadata fields like the `_routing` field: - -[source,console] --------------------------------------------------- -PUT my-index-000001/_doc/2?routing=user1 -{ - "counter" : 1, - "tags" : ["env2"] -} --------------------------------------------------- -// TEST[continued] - -[source,console] --------------------------------------------------- -GET my-index-000001/_doc/2?routing=user1&stored_fields=tags,counter --------------------------------------------------- -// TEST[continued] - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "_index": "my-index-000001", - "_id": "2", - "_version": 1, - "_seq_no" : 13, - "_primary_term" : 1, - "_routing": "user1", - "found": true, - "fields": { - "tags": [ - "env2" - ] - } -} --------------------------------------------------- -// TESTRESPONSE[s/"_seq_no" : \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/] - -Only leaf fields can be retrieved with the `stored_field` option. Object fields -can't be returned--if specified, the request fails. diff --git a/docs/reference/docs/index_.asciidoc b/docs/reference/docs/index_.asciidoc deleted file mode 100644 index ccc8e67f39bc0..0000000000000 --- a/docs/reference/docs/index_.asciidoc +++ /dev/null @@ -1,603 +0,0 @@ -[[docs-index_]] -=== Index API -++++ -Index -++++ - -IMPORTANT: See <>. - -Adds a JSON document to the specified data stream or index and makes -it searchable. If the target is an index and the document already exists, -the request updates the document and increments its version. - -NOTE: You cannot use the index API to send update requests for existing -documents to a data stream. See <> -and <>. - -[[docs-index-api-request]] -==== {api-request-title} - -`PUT //_doc/<_id>` - -`POST //_doc/` - -`PUT //_create/<_id>` - -`POST //_create/<_id>` - -IMPORTANT: You cannot add new documents to a data stream using the -`PUT //_doc/<_id>` request format. To specify a document ID, use the -`PUT //_create/<_id>` format instead. See -<>. - -[[docs-index-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the following -<> for the target data stream, index, -or index alias: - -** To add or overwrite a document using the `PUT //_doc/<_id>` request -format, you must have the `create`, `index`, or `write` index privilege. - -** To add a document using the `POST //_doc/`, -`PUT //_create/<_id>`, or `POST //_create/<_id>` request -formats, you must have the `create_doc`, `create`, `index`, or `write` index -privilege. - -** To automatically create a data stream or index with an index API request, you -must have the `auto_configure`, `create_index`, or `manage` index privilege. - -* Automatic data stream creation requires a matching index template with data -stream enabled. See <>. - -[[docs-index-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) Name of the data stream or index to target. -+ -If the target doesn't exist and matches the name or wildcard (`*`) pattern of an -<>, this request creates the data stream. See -<>. -+ -If the target doesn't exist and doesn't match a data stream template, -this request creates the index. -+ -You can check for existing targets using the resolve index API. - -`<_id>`:: -(Optional, string) Unique identifier for the document. -+ --- -This parameter is required for the following request formats: - -* `PUT //_doc/<_id>` -* `PUT //_create/<_id>` -* `POST //_create/<_id>` - -To automatically generate a document ID, use the `POST //_doc/` request -format and omit this parameter. --- - - - -[[docs-index-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=if_seq_no] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=if_primary_term] - -[[docs-index-api-op_type]] -`op_type`:: -(Optional, enum) Set to `create` to only index the document -if it does not already exist (_put if absent_). If a document with the specified -`_id` already exists, the indexing operation will fail. Same as using the -`/_create` endpoint. Valid values: `index`, `create`. -If document id is specified, it defaults to `index`. Otherwise, it defaults to `create`. -+ -NOTE: If the request targets a data stream, an `op_type` of `create` is -required. See <>. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=pipeline] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=refresh] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=routing] - -`timeout`:: -+ --- -(Optional, <>) -Period the request waits for the following operations: - -* <> -* <> updates -* <> - -Defaults to `1m` (one minute). This guarantees {es} waits for at least the -timeout before failing. The actual wait time could be longer, particularly when -multiple waits occur. --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=doc-version] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=version_type] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=wait_for_active_shards] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=require-alias] - -[[docs-index-api-request-body]] -==== {api-request-body-title} - -``:: -(Required, string) Request body contains the JSON source for the document -data. - -[[docs-index-api-response-body]] -==== {api-response-body-title} - -`_shards`:: -Provides information about the replication process of the index operation. - -`_shards.total`:: -Indicates how many shard copies (primary and replica shards) the index operation -should be executed on. - -`_shards.successful`:: -Indicates the number of shard copies the index operation succeeded on. -When the index operation is successful, `successful` is at least 1. -+ -NOTE: Replica shards might not all be started when an indexing operation -returns successfully--by default, only the primary is required. Set -`wait_for_active_shards` to change this default behavior. See -<>. - -`_shards.failed`:: -An array that contains replication-related errors in the case an index operation -failed on a replica shard. 0 indicates there were no failures. - -`_index`:: -The name of the index the document was added to. - -`_type`:: -The document type. {es} indices now support a single document type, `_doc`. - -`_id`:: -The unique identifier for the added document. - -`_version`:: -The document version. Incremented each time the document is updated. - -`_seq_no`:: -The sequence number assigned to the document for the indexing operation. -Sequence numbers are used to ensure an older version of a document -doesn’t overwrite a newer version. See <>. - -`_primary_term`:: -The primary term assigned to the document for the indexing operation. -See <>. - -`result`:: -The result of the indexing operation, `created` or `updated`. - -[[docs-index-api-desc]] -==== {api-description-title} - -You can index a new JSON document with the `_doc` or `_create` resource. Using -`_create` guarantees that the document is only indexed if it does not already -exist. To update an existing document, you must use the `_doc` resource. - -[[index-creation]] -===== Automatically create data streams and indices - -If request's target doesn't exist and matches an -<>, the index operation automatically creates the data stream. See -<>. - -If the target doesn't exist and doesn't match a data stream template, -the operation automatically creates the index and applies any matching -<>. - -NOTE: {es} includes several built-in index templates. To avoid naming collisions -with these templates, see <>. - -If no mapping exists, the index operation -creates a dynamic mapping. By default, new fields and objects are -automatically added to the mapping if needed. For more information about field -mapping, see <> and the <> API. - -Automatic index creation is controlled by the <> -setting. This setting defaults to `true`, which allows any index to be created -automatically. You can modify this setting to explicitly allow or block -automatic creation of indices that match specified patterns, or set it to -`false` to disable automatic index creation entirely. Specify a -comma-separated list of patterns you want to allow, or prefix each pattern with -`+` or `-` to indicate whether it should be allowed or blocked. When a list is -specified, the default behaviour is to disallow. - -IMPORTANT: The `action.auto_create_index` setting only affects the automatic -creation of indices. It does not affect the creation of data streams. - -[source,console] --------------------------------------------------- -PUT _cluster/settings -{ - "persistent": { - "action.auto_create_index": "my-index-000001,index10,-index1*,+ind*" <1> - } -} - -PUT _cluster/settings -{ - "persistent": { - "action.auto_create_index": "false" <2> - } -} - -PUT _cluster/settings -{ - "persistent": { - "action.auto_create_index": "true" <3> - } -} --------------------------------------------------- - -<1> Allow auto-creation of indices called `my-index-000001` or `index10`, block the -creation of indices that match the pattern `index1*`, and allow creation of -any other indices that match the `ind*` pattern. Patterns are matched in -the order specified. - -<2> Disable automatic index creation entirely. - -<3> Allow automatic creation of any index. This is the default. - -[discrete] -[[operation-type]] -===== Put if absent - -You can force a create operation by using the `_create` resource or -setting the `op_type` parameter to _create_. In this case, -the index operation fails if a document with the specified ID -already exists in the index. - -[discrete] -[[create-document-ids-automatically]] -===== Create document IDs automatically - -When using the `POST //_doc/` request format, the `op_type` is -automatically set to `create` and the index operation generates a unique ID for -the document. - -[source,console] --------------------------------------------------- -POST my-index-000001/_doc/ -{ - "@timestamp": "2099-11-15T13:12:00", - "message": "GET /search HTTP/1.1 200 1070000", - "user": { - "id": "kimchy" - } -} --------------------------------------------------- - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "_shards": { - "total": 2, - "failed": 0, - "successful": 2 - }, - "_index": "my-index-000001", - "_id": "W0tpsmIBdwcYyG50zbta", - "_version": 1, - "_seq_no": 0, - "_primary_term": 1, - "result": "created" -} --------------------------------------------------- -// TESTRESPONSE[s/W0tpsmIBdwcYyG50zbta/$body._id/ s/"successful": 2/"successful": 1/] - -[discrete] -[[optimistic-concurrency-control-index]] -===== Optimistic concurrency control - -Index operations can be made conditional and only be performed if the last -modification to the document was assigned the sequence number and primary -term specified by the `if_seq_no` and `if_primary_term` parameters. If a -mismatch is detected, the operation will result in a `VersionConflictException` -and a status code of 409. See <> for more details. - -[discrete] -[[index-routing]] -===== Routing - -By default, shard placement -- or `routing` -- is controlled by using a -hash of the document's id value. For more explicit control, the value -fed into the hash function used by the router can be directly specified -on a per-operation basis using the `routing` parameter. For example: - -[source,console] --------------------------------------------------- -POST my-index-000001/_doc?routing=kimchy -{ - "@timestamp": "2099-11-15T13:12:00", - "message": "GET /search HTTP/1.1 200 1070000", - "user": { - "id": "kimchy" - } -} --------------------------------------------------- - -In this example, the document is routed to a shard based on -the `routing` parameter provided: "kimchy". - -When setting up explicit mapping, you can also use the `_routing` field -to direct the index operation to extract the routing value from the -document itself. This does come at the (very minimal) cost of an -additional document parsing pass. If the `_routing` mapping is defined -and set to be `required`, the index operation will fail if no routing -value is provided or extracted. - -NOTE: Data streams do not support custom routing unless they were created with -the <> setting -enabled in the template. - -[discrete] -[[index-distributed]] -===== Distributed - -The index operation is directed to the primary shard based on its route -(see the Routing section above) and performed on the actual node -containing this shard. After the primary shard completes the operation, -if needed, the update is distributed to applicable replicas. - -[discrete] -[[index-wait-for-active-shards]] -===== Active shards - -To improve the resiliency of writes to the system, indexing operations -can be configured to wait for a certain number of active shard copies -before proceeding with the operation. If the requisite number of active -shard copies are not available, then the write operation must wait and -retry, until either the requisite shard copies have started or a timeout -occurs. By default, write operations only wait for the primary shards -to be active before proceeding (i.e. `wait_for_active_shards=1`). -This default can be overridden in the index settings dynamically -by setting `index.write.wait_for_active_shards`. To alter this behavior -per operation, the `wait_for_active_shards` request parameter can be used. - -Valid values are `all` or any positive integer up to the total number -of configured copies per shard in the index (which is `number_of_replicas+1`). -Specifying a negative value or a number greater than the number of -shard copies will throw an error. - -For example, suppose we have a cluster of three nodes, `A`, `B`, and `C` and -we create an index `index` with the number of replicas set to 3 (resulting in -4 shard copies, one more copy than there are nodes). If we -attempt an indexing operation, by default the operation will only ensure -the primary copy of each shard is available before proceeding. This means -that even if `B` and `C` went down, and `A` hosted the primary shard copies, -the indexing operation would still proceed with only one copy of the data. -If `wait_for_active_shards` is set on the request to `3` (and all 3 nodes -are up), then the indexing operation will require 3 active shard copies -before proceeding, a requirement which should be met because there are 3 -active nodes in the cluster, each one holding a copy of the shard. However, -if we set `wait_for_active_shards` to `all` (or to `4`, which is the same), -the indexing operation will not proceed as we do not have all 4 copies of -each shard active in the index. The operation will timeout -unless a new node is brought up in the cluster to host the fourth copy of -the shard. - -It is important to note that this setting greatly reduces the chances of -the write operation not writing to the requisite number of shard copies, -but it does not completely eliminate the possibility, because this check -occurs before the write operation commences. Once the write operation -is underway, it is still possible for replication to fail on any number of -shard copies but still succeed on the primary. The `_shards` section of the -write operation's response reveals the number of shard copies on which -replication succeeded/failed. - -[source,js] --------------------------------------------------- -{ - "_shards": { - "total": 2, - "failed": 0, - "successful": 2 - } -} --------------------------------------------------- -// NOTCONSOLE - -[discrete] -[[index-refresh]] -===== Refresh - -Control when the changes made by this request are visible to search. See -<>. - -[discrete] -[[index-noop]] -===== Noop updates - -When updating a document using the index API a new version of the document is -always created even if the document hasn't changed. If this isn't acceptable -use the `_update` API with `detect_noop` set to true. This option isn't -available on the index API because the index API doesn't fetch the old source -and isn't able to compare it against the new source. - -There isn't a hard and fast rule about when noop updates aren't acceptable. -It's a combination of lots of factors like how frequently your data source -sends updates that are actually noops and how many queries per second -Elasticsearch runs on the shard receiving the updates. - -[discrete] -[[timeout]] -===== Timeout - -The primary shard assigned to perform the index operation might not be -available when the index operation is executed. Some reasons for this -might be that the primary shard is currently recovering from a gateway -or undergoing relocation. By default, the index operation will wait on -the primary shard to become available for up to 1 minute before failing -and responding with an error. The `timeout` parameter can be used to -explicitly specify how long it waits. Here is an example of setting it -to 5 minutes: - -[source,console] --------------------------------------------------- -PUT my-index-000001/_doc/1?timeout=5m -{ - "@timestamp": "2099-11-15T13:12:00", - "message": "GET /search HTTP/1.1 200 1070000", - "user": { - "id": "kimchy" - } -} --------------------------------------------------- - -[discrete] -[[index-versioning]] -===== Versioning - -Each indexed document is given a version number. By default, -internal versioning is used that starts at 1 and increments -with each update, deletes included. Optionally, the version number can be -set to an external value (for example, if maintained in a -database). To enable this functionality, `version_type` should be set to -`external`. The value provided must be a numeric, long value greater than or equal to 0, -and less than around 9.2e+18. - -When using the external version type, the system checks to see if -the version number passed to the index request is greater than the -version of the currently stored document. If true, the document will be -indexed and the new version number used. If the value provided is less -than or equal to the stored document's version number, a version -conflict will occur and the index operation will fail. For example: - -[source,console] --------------------------------------------------- -PUT my-index-000001/_doc/1?version=2&version_type=external -{ - "user": { - "id": "elkbee" - } -} --------------------------------------------------- -// TEST[continued] - -NOTE: Versioning is completely real time, and is not affected by the -near real time aspects of search operations. If no version is provided, -then the operation is executed without any version checks. - -In the previous example, the operation will succeed since the supplied -version of 2 is higher than -the current document version of 1. If the document was already updated -and its version was set to 2 or higher, the indexing command will fail -and result in a conflict (409 http status code). - -A nice side effect is that there is no need to maintain strict ordering -of async indexing operations executed as a result of changes to a source -database, as long as version numbers from the source database are used. -Even the simple case of updating the Elasticsearch index using data from -a database is simplified if external versioning is used, as only the -latest version will be used if the index operations arrive out of order for -whatever reason. - -[discrete] -[[index-version-types]] -===== Version types - -In addition to the `external` version type, Elasticsearch -also supports other types for specific use cases: - -[[_version_types]] -`external` or `external_gt`:: Only index the document if the given version is strictly higher -than the version of the stored document *or* if there is no existing document. The given -version will be used as the new version and will be stored with the new document. The supplied -version must be a non-negative long number. - -`external_gte`:: Only index the document if the given version is *equal* or higher -than the version of the stored document. If there is no existing document -the operation will succeed as well. The given version will be used as the new version -and will be stored with the new document. The supplied version must be a non-negative long number. - -NOTE: The `external_gte` version type is meant for special use cases and -should be used with care. If used incorrectly, it can result in loss of data. -There is another option, `force`, which is deprecated because it can cause -primary and replica shards to diverge. - -[[docs-index-api-example]] -==== {api-examples-title} - -Insert a JSON document into the `my-index-000001` index with an `_id` of 1: - -[source,console] --------------------------------------------------- -PUT my-index-000001/_doc/1 -{ - "@timestamp": "2099-11-15T13:12:00", - "message": "GET /search HTTP/1.1 200 1070000", - "user": { - "id": "kimchy" - } -} --------------------------------------------------- - -The API returns the following result: - -[source,console-result] --------------------------------------------------- -{ - "_shards": { - "total": 2, - "failed": 0, - "successful": 2 - }, - "_index": "my-index-000001", - "_id": "1", - "_version": 1, - "_seq_no": 0, - "_primary_term": 1, - "result": "created" -} --------------------------------------------------- -// TESTRESPONSE[s/"successful": 2/"successful": 1/] - -Use the `_create` resource to index a document into the `my-index-000001` index if -no document with that ID exists: - -[source,console] --------------------------------------------------- -PUT my-index-000001/_create/1 -{ - "@timestamp": "2099-11-15T13:12:00", - "message": "GET /search HTTP/1.1 200 1070000", - "user": { - "id": "kimchy" - } -} --------------------------------------------------- - -Set the `op_type` parameter to _create_ to index a document into the `my-index-000001` -index if no document with that ID exists: - -[source,console] --------------------------------------------------- -PUT my-index-000001/_doc/1?op_type=create -{ - "@timestamp": "2099-11-15T13:12:00", - "message": "GET /search HTTP/1.1 200 1070000", - "user": { - "id": "kimchy" - } -} --------------------------------------------------- diff --git a/docs/reference/docs/multi-get.asciidoc b/docs/reference/docs/multi-get.asciidoc deleted file mode 100644 index 293bd2568a34b..0000000000000 --- a/docs/reference/docs/multi-get.asciidoc +++ /dev/null @@ -1,283 +0,0 @@ -[[docs-multi-get]] -=== Multi get (mget) API -++++ -Multi get -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-document[Document APIs]. --- - -Retrieves multiple JSON documents by ID. - -[source,console] --------------------------------------------------- -GET /_mget -{ - "docs": [ - { - "_index": "my-index-000001", - "_id": "1" - }, - { - "_index": "my-index-000001", - "_id": "2" - } - ] -} --------------------------------------------------- -// TEST[setup:my_index] - -[[docs-multi-get-api-request]] -==== {api-request-title} - -`GET /_mget` - -`GET //_mget` - -[[docs-multi-get-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `read` -<> for the target index or index alias. - -[[docs-multi-get-api-desc]] -==== {api-description-title} - -You use `mget` to retrieve multiple documents from one or more indices. -If you specify an index in the request URI, you only need to specify the document IDs in the request body. - -[[mget-security]] -===== Security - -See <>. - -[[multi-get-partial-responses]] -===== Partial responses - -To ensure fast responses, the multi get API responds with partial results if one or more shards fail. -See <> for more information. - -[[docs-multi-get-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Name of the index to retrieve documents from when `ids` are specified, -or when a document in the `docs` array does not specify an index. - -[[docs-multi-get-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=preference] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=realtime] - -`refresh`:: -(Optional, Boolean) If `true`, the request refreshes relevant shards before -retrieving documents. Defaults to `false`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=routing] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=stored_fields] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=source] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=source_excludes] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=source_includes] - -[[docs-multi-get-api-request-body]] -==== {api-request-body-title} - -`docs`:: -(Optional, array) The documents you want to retrieve. -Required if no index is specified in the request URI. -You can specify the following attributes for each -document: -+ --- -`_id`:: -(Required, string) The unique document ID. - -`_index`:: -(Optional, string) -The index that contains the document. -Required if no index is specified in the request URI. - -`routing`:: -(Optional, string) The key for the primary shard the document resides on. -Required if routing is used during indexing. - -`_source`:: -(Optional, Boolean) If `false`, excludes all `_source` fields. Defaults to `true`. -`source_include`::: -(Optional, array) The fields to extract and return from the `_source` field. -`source_exclude`::: -(Optional, array) The fields to exclude from the returned `_source` field. - -`_stored_fields`:: -(Optional, array) The stored fields you want to retrieve. --- - -`ids`:: -(Optional, array) The IDs of the documents you want to retrieve. -Allowed when the index is specified in the request URI. - -[[multi-get-api-response-body]] -==== {api-response-body-title} - -The response includes a `docs` array that contains the documents in the order specified in the request. -The structure of the returned documents is similar to that returned by the <> API. -If there is a failure getting a particular document, the error is included in place of the document. - -[[docs-multi-get-api-example]] -==== {api-examples-title} - -[[mget-ids]] -===== Get documents by ID - -If you specify an index in the request URI, only the document IDs are required in the request body: - -[source,console] --------------------------------------------------- -GET /my-index-000001/_mget -{ - "docs": [ - { - "_id": "1" - }, - { - "_id": "2" - } - ] -} --------------------------------------------------- -// TEST[setup:my_index] - -You can use the `ids` element to simplify the request: - -[source,console] --------------------------------------------------- -GET /my-index-000001/_mget -{ - "ids" : ["1", "2"] -} --------------------------------------------------- -// TEST[setup:my_index] - -[[mget-source-filtering]] -===== Filter source fields - -By default, the `_source` field is returned for every document (if stored). -Use the `_source` and `_source_include` or `source_exclude` attributes to -filter what fields are returned for a particular document. -You can include the `_source`, `_source_includes`, and `_source_excludes` query parameters in the -request URI to specify the defaults to use when there are no per-document instructions. - -For example, the following request sets `_source` to false for document 1 to exclude the -source entirely, retrieves `field3` and `field4` from document 2, and retrieves the `user` field -from document 3 but filters out the `user.location` field. - -[source,console] --------------------------------------------------- -GET /_mget -{ - "docs": [ - { - "_index": "test", - "_id": "1", - "_source": false - }, - { - "_index": "test", - "_id": "2", - "_source": [ "field3", "field4" ] - }, - { - "_index": "test", - "_id": "3", - "_source": { - "include": [ "user" ], - "exclude": [ "user.location" ] - } - } - ] -} --------------------------------------------------- - -[[mget-fields]] -===== Get stored fields - -Use the `stored_fields` attribute to specify the set of stored fields you want -to retrieve. Any requested fields that are not stored are ignored. -You can include the `stored_fields` query parameter in the request URI to specify the defaults -to use when there are no per-document instructions. - -For example, the following request retrieves `field1` and `field2` from document 1, and -`field3` and `field4` from document 2: - -[source,console] --------------------------------------------------- -GET /_mget -{ - "docs": [ - { - "_index": "test", - "_id": "1", - "stored_fields": [ "field1", "field2" ] - }, - { - "_index": "test", - "_id": "2", - "stored_fields": [ "field3", "field4" ] - } - ] -} --------------------------------------------------- - -The following request retrieves `field1` and `field2` from all documents by default. -These default fields are returned for document 1, but -overridden to return `field3` and `field4` for document 2. - -[source,console] --------------------------------------------------- -GET /test/_mget?stored_fields=field1,field2 -{ - "docs": [ - { - "_id": "1" - }, - { - "_id": "2", - "stored_fields": [ "field3", "field4" ] - } - ] -} --------------------------------------------------- - -[[mget-routing]] -===== Specify document routing - -If routing is used during indexing, you need to specify the routing value to retrieve documents. -For example, the following request fetches `test/_doc/2` from the shard corresponding to routing key `key1`, -and fetches `test/_doc/1` from the shard corresponding to routing key `key2`. - -[source,console] --------------------------------------------------- -GET /_mget?routing=key1 -{ - "docs": [ - { - "_index": "test", - "_id": "1", - "routing": "key2" - }, - { - "_index": "test", - "_id": "2" - } - ] -} --------------------------------------------------- diff --git a/docs/reference/docs/multi-termvectors.asciidoc b/docs/reference/docs/multi-termvectors.asciidoc deleted file mode 100644 index 5a27e0b9b3a37..0000000000000 --- a/docs/reference/docs/multi-termvectors.asciidoc +++ /dev/null @@ -1,160 +0,0 @@ -[[docs-multi-termvectors]] -=== Multi term vectors API -++++ -Multi term vectors -++++ - -Retrieves multiple term vectors with a single request. - -[source,console] --------------------------------------------------- -POST /_mtermvectors -{ - "docs": [ - { - "_index": "my-index-000001", - "_id": "2", - "term_statistics": true - }, - { - "_index": "my-index-000001", - "_id": "1", - "fields": [ - "message" - ] - } - ] -} --------------------------------------------------- -// TEST[setup:my_index] - -[[docs-multi-termvectors-api-request]] -==== {api-request-title} - -`POST /_mtermvectors` - -`POST //_mtermvectors` - -[[docs-multi-termvectors-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `read` -<> for the target index or index alias. - -[[docs-multi-termvectors-api-desc]] -==== {api-description-title} - -You can specify existing documents by index and ID or -provide artificial documents in the body of the request. -You can specify the index in the request body or request URI. - -The response contains a `docs` array with all the fetched termvectors. -Each element has the structure provided by the <> -API. - -See the <> API for more information about the information -that can be included in the response. - -[[docs-multi-termvectors-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Name of the index that contains the documents. - -[[docs-multi-termvectors-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=fields] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=field_statistics] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=offsets] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=payloads] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=positions] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=preference] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=routing] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=realtime] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=term_statistics] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=version] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=version_type] - -[discrete] -[[docs-multi-termvectors-api-example]] -==== {api-examples-title} - -If you specify an index in the request URI, the index does not need to be specified for each documents -in the request body: - -[source,console] --------------------------------------------------- -POST /my-index-000001/_mtermvectors -{ - "docs": [ - { - "_id": "2", - "fields": [ - "message" - ], - "term_statistics": true - }, - { - "_id": "1" - } - ] -} --------------------------------------------------- -// TEST[setup:my_index] - -If all requested documents are in same index and the parameters are the same, you can use the -following simplified syntax: - -[source,console] --------------------------------------------------- -POST /my-index-000001/_mtermvectors -{ - "ids": [ "1", "2" ], - "parameters": { - "fields": [ - "message" - ], - "term_statistics": true - } -} --------------------------------------------------- -// TEST[setup:my_index] - -[[docs-multi-termvectors-artificial-doc]] -===== Artificial documents - -You can also use `mtermvectors` to generate term vectors for _artificial_ documents provided -in the body of the request. The mapping used is determined by the specified `_index`. - -[source,console] --------------------------------------------------- -POST /_mtermvectors -{ - "docs": [ - { - "_index": "my-index-000001", - "doc" : { - "message" : "test test test" - } - }, - { - "_index": "my-index-000001", - "doc" : { - "message" : "Another test ..." - } - } - ] -} --------------------------------------------------- -// TEST[setup:my_index] diff --git a/docs/reference/docs/refresh.asciidoc b/docs/reference/docs/refresh.asciidoc deleted file mode 100644 index 2bbac2c0b1ccd..0000000000000 --- a/docs/reference/docs/refresh.asciidoc +++ /dev/null @@ -1,111 +0,0 @@ -[[docs-refresh]] -=== `?refresh` - -The <>, <>, <>, and -<> APIs support setting `refresh` to control when changes made -by this request are made visible to search. These are the allowed values: - -Empty string or `true`:: - -Refresh the relevant primary and replica shards (not the whole index) -immediately after the operation occurs, so that the updated document appears -in search results immediately. This should *ONLY* be done after careful thought -and verification that it does not lead to poor performance, both from an -indexing and a search standpoint. - -`wait_for`:: - -Wait for the changes made by the request to be made visible by a refresh before -replying. This doesn't force an immediate refresh, rather, it waits for a -refresh to happen. Elasticsearch automatically refreshes shards that have changed -every `index.refresh_interval` which defaults to one second. That setting is -<>. Calling the <> API or -setting `refresh` to `true` on any of the APIs that support it will also -cause a refresh, in turn causing already running requests with `refresh=wait_for` -to return. - -`false` (the default):: - -Take no refresh related actions. The changes made by this request will be made -visible at some point after the request returns. - -[discrete] -==== Choosing which setting to use -// tag::refresh-default[] -Unless you have a good reason to wait for the change to become visible, always -use `refresh=false` (the default setting). The simplest and fastest choice is to omit the `refresh` parameter from the URL. - -If you absolutely must have the changes made by a request visible synchronously -with the request, you must choose between putting more load on -Elasticsearch (`true`) and waiting longer for the response (`wait_for`). -// end::refresh-default[] -Here are a few points that should inform that decision: - -* The more changes being made to the index the more work `wait_for` saves -compared to `true`. In the case that the index is only changed once every -`index.refresh_interval` then it saves no work. -* `true` creates less efficient indexes constructs (tiny segments) that must -later be merged into more efficient index constructs (larger segments). Meaning -that the cost of `true` is paid at index time to create the tiny segment, at -search time to search the tiny segment, and at merge time to make the larger -segments. -* Never start multiple `refresh=wait_for` requests in a row. Instead batch them -into a single bulk request with `refresh=wait_for` and Elasticsearch will start -them all in parallel and return only when they have all finished. -* If the refresh interval is set to `-1`, disabling the automatic refreshes, -then requests with `refresh=wait_for` will wait indefinitely until some action -causes a refresh. Conversely, setting `index.refresh_interval` to something -shorter than the default like `200ms` will make `refresh=wait_for` come back -faster, but it'll still generate inefficient segments. -* `refresh=wait_for` only affects the request that it is on, but, by forcing a -refresh immediately, `refresh=true` will affect other ongoing request. In -general, if you have a running system you don't wish to disturb then -`refresh=wait_for` is a smaller modification. - -[discrete] -[[refresh_wait_for-force-refresh]] -==== `refresh=wait_for` Can Force a Refresh - -If a `refresh=wait_for` request comes in when there are already -`index.max_refresh_listeners` (defaults to 1000) requests waiting for a refresh -on that shard then that request will behave just as though it had `refresh` set -to `true` instead: it will force a refresh. This keeps the promise that when a -`refresh=wait_for` request returns that its changes are visible for search -while preventing unchecked resource usage for blocked requests. If a request -forced a refresh because it ran out of listener slots then its response will -contain `"forced_refresh": true`. - -Bulk requests only take up one slot on each shard that they touch no matter how -many times they modify the shard. - -[discrete] -==== Examples - -These will create a document and immediately refresh the index so it is visible: - -[source,console] --------------------------------------------------- -PUT /test/_doc/1?refresh -{"test": "test"} -PUT /test/_doc/2?refresh=true -{"test": "test"} --------------------------------------------------- - -These will create a document without doing anything to make it visible for -search: - -[source,console] --------------------------------------------------- -PUT /test/_doc/3 -{"test": "test"} -PUT /test/_doc/4?refresh=false -{"test": "test"} --------------------------------------------------- - -This will create a document and wait for it to become visible for search: - -[source,console] --------------------------------------------------- -PUT /test/_doc/4?refresh=wait_for -{"test": "test"} --------------------------------------------------- diff --git a/docs/reference/docs/reindex.asciidoc b/docs/reference/docs/reindex.asciidoc deleted file mode 100644 index 455410ad943a0..0000000000000 --- a/docs/reference/docs/reindex.asciidoc +++ /dev/null @@ -1,1135 +0,0 @@ -[[docs-reindex]] -=== Reindex API -++++ -Reindex -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-document[Document APIs]. --- - -Copies documents from a source to a destination. - -The source can be any existing index, alias, or data stream. The destination -must differ from the source. For example, you cannot reindex a data stream into -itself. - -[IMPORTANT] -================================================= -Reindex requires <> to be enabled for -all documents in the source. - -The destination should be configured as wanted before calling `_reindex`. -Reindex does not copy the settings from the source or its associated template. - -Mappings, shard counts, replicas, and so on must be configured ahead of time. -================================================= - -[source,console] --------------------------------------------------- -POST _reindex -{ - "source": { - "index": "my-index-000001" - }, - "dest": { - "index": "my-new-index-000001" - } -} --------------------------------------------------- -// TEST[setup:my_index_big] - -//// - -[source,console-result] --------------------------------------------------- -{ - "took" : 147, - "timed_out": false, - "created": 120, - "updated": 0, - "deleted": 0, - "batches": 1, - "version_conflicts": 0, - "noops": 0, - "retries": { - "bulk": 0, - "search": 0 - }, - "throttled_millis": 0, - "requests_per_second": -1.0, - "throttled_until_millis": 0, - "total": 120, - "failures" : [ ] -} --------------------------------------------------- -// TESTRESPONSE[s/"took" : 147/"took" : "$body.took"/] - -//// - -[[docs-reindex-api-request]] -==== {api-request-title} - -`POST /_reindex` - -[[docs-reindex-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the following -security privileges: - -** The `read` <> for the source data -stream, index, or alias. - -** The `write` index privilege for the destination data stream, index, or index -alias. - -** To automatically create a data stream or index with an reindex API request, -you must have the `auto_configure`, `create_index`, or `manage` index -privilege for the destination data stream, index, or alias. - -** If reindexing from a remote cluster, the `source.remote.user` must have the -`monitor` <> and the `read` index -privilege for the source data stream, index, or alias. - -* If reindexing from a remote cluster, you must explicitly allow the remote host -in the `reindex.remote.whitelist` setting of `elasticsearch.yml`. See -<>. - -* Automatic data stream creation requires a matching index template with data -stream enabled. See <>. - -[[docs-reindex-api-desc]] -==== {api-description-title} - -// tag::docs-reindex-api-desc-tag[] -Extracts the <> from the source index and indexes the documents into the destination index. -You can copy all documents to the destination index, or reindex a subset of the documents. -// end::docs-reindex-api-desc-tag[] - - -Just like <>, `_reindex` gets a -snapshot of the source but its destination must be **different** so -version conflicts are unlikely. The `dest` element can be configured like the -index API to control optimistic concurrency control. Omitting -`version_type` or setting it to `internal` causes Elasticsearch -to blindly dump documents into the destination, overwriting any that happen to have -the same ID. - -Setting `version_type` to `external` causes Elasticsearch to preserve the -`version` from the source, create any documents that are missing, and update -any documents that have an older version in the destination than they do -in the source. - -Setting `op_type` to `create` causes `_reindex` to only create missing -documents in the destination. All existing documents will cause a version -conflict. - -IMPORTANT: Because data streams are <>, -any reindex request to a destination data stream must have an `op_type` -of `create`. A reindex can only add new documents to a destination data stream. -It cannot update existing documents in a destination data stream. - -By default, version conflicts abort the `_reindex` process. -To continue reindexing if there are conflicts, set the `"conflicts"` request body parameter to `proceed`. -In this case, the response includes a count of the version conflicts that were encountered. -Note that the handling of other error types is unaffected by the `"conflicts"` parameter. -Additionally, if you opt to count version conflicts the operation could attempt to reindex more documents -from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target, or it has gone -through every document in the source query. - -[[docs-reindex-task-api]] -===== Running reindex asynchronously - -If the request contains `wait_for_completion=false`, {es} -performs some preflight checks, launches the request, and returns a -<> you can use to cancel or get the status of the task. -{es} creates a record of this task as a document at `_tasks/`. - -[[docs-reindex-from-multiple-sources]] -===== Reindex from multiple sources -If you have many sources to reindex it is generally better to reindex them -one at a time rather than using a glob pattern to pick up multiple sources. That -way you can resume the process if there are any errors by removing the -partially completed source and starting over. It also makes -parallelizing the process fairly simple: split the list of sources to reindex -and run each list in parallel. - -One-off bash scripts seem to work nicely for this: - -[source,bash] ----------------------------------------------------------------- -for index in i1 i2 i3 i4 i5; do - curl -HContent-Type:application/json -XPOST localhost:9200/_reindex?pretty -d'{ - "source": { - "index": "'$index'" - }, - "dest": { - "index": "'$index'-reindexed" - } - }' -done ----------------------------------------------------------------- -// NOTCONSOLE - -[[docs-reindex-throttle]] -===== Throttling - -Set `requests_per_second` to any positive decimal number (`1.4`, `6`, -`1000`, etc.) to throttle the rate at which `_reindex` issues batches of index -operations. Requests are throttled by padding each batch with a wait time. -To disable throttling, set `requests_per_second` to `-1`. - -The throttling is done by waiting between batches so that the `scroll` that `_reindex` -uses internally can be given a timeout that takes into account the padding. -The padding time is the difference between the batch size divided by the -`requests_per_second` and the time spent writing. By default the batch size is -`1000`, so if `requests_per_second` is set to `500`: - -[source,txt] --------------------------------------------------- -target_time = 1000 / 500 per second = 2 seconds -wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds --------------------------------------------------- - -Since the batch is issued as a single `_bulk` request, large batch sizes -cause Elasticsearch to create many requests and then wait for a while before -starting the next set. This is "bursty" instead of "smooth". - -[[docs-reindex-rethrottle]] -===== Rethrottling - -The value of `requests_per_second` can be changed on a running reindex using -the `_rethrottle` API: - -[source,console] --------------------------------------------------- -POST _reindex/r1A2WoRbTwKZ516z6NEs5A:36619/_rethrottle?requests_per_second=-1 --------------------------------------------------- - -The task ID can be found using the <>. - -Just like when setting it on the Reindex API, `requests_per_second` -can be either `-1` to disable throttling or any decimal number -like `1.7` or `12` to throttle to that level. Rethrottling that speeds up the -query takes effect immediately, but rethrottling that slows down the query will -take effect after completing the current batch. This prevents scroll -timeouts. - -[[docs-reindex-slice]] -===== Slicing - -Reindex supports <> to parallelize the reindexing process. -This parallelization can improve efficiency and provide a convenient way to -break the request down into smaller parts. - -// tag::remote-reindex-slicing[] -NOTE: Reindexing from remote clusters does not support -<> or <>. -// end::remote-reindex-slicing[] - -[[docs-reindex-manual-slice]] -====== Manual slicing -Slice a reindex request manually by providing a slice id and total number of -slices to each request: - -[source,console] ----------------------------------------------------------------- -POST _reindex -{ - "source": { - "index": "my-index-000001", - "slice": { - "id": 0, - "max": 2 - } - }, - "dest": { - "index": "my-new-index-000001" - } -} -POST _reindex -{ - "source": { - "index": "my-index-000001", - "slice": { - "id": 1, - "max": 2 - } - }, - "dest": { - "index": "my-new-index-000001" - } -} ----------------------------------------------------------------- -// TEST[setup:my_index_big] - -You can verify this works by: - -[source,console] ----------------------------------------------------------------- -GET _refresh -POST my-new-index-000001/_search?size=0&filter_path=hits.total ----------------------------------------------------------------- -// TEST[continued] - -which results in a sensible `total` like this one: - -[source,console-result] ----------------------------------------------------------------- -{ - "hits": { - "total" : { - "value": 120, - "relation": "eq" - } - } -} ----------------------------------------------------------------- - -[[docs-reindex-automatic-slice]] -====== Automatic slicing - -You can also let `_reindex` automatically parallelize using <> to -slice on `_id`. Use `slices` to specify the number of slices to use: - -[source,console] ----------------------------------------------------------------- -POST _reindex?slices=5&refresh -{ - "source": { - "index": "my-index-000001" - }, - "dest": { - "index": "my-new-index-000001" - } -} ----------------------------------------------------------------- -// TEST[setup:my_index_big] - -You can also verify this works by: - -[source,console] ----------------------------------------------------------------- -POST my-new-index-000001/_search?size=0&filter_path=hits.total ----------------------------------------------------------------- -// TEST[continued] - -which results in a sensible `total` like this one: - -[source,console-result] ----------------------------------------------------------------- -{ - "hits": { - "total" : { - "value": 120, - "relation": "eq" - } - } -} ----------------------------------------------------------------- - -Setting `slices` to `auto` will let Elasticsearch choose the number of slices to -use. This setting will use one slice per shard, up to a certain limit. If there -are multiple sources, it will choose the number of -slices based on the index or <> with the smallest -number of shards. - -Adding `slices` to `_reindex` just automates the manual process used in the -section above, creating sub-requests which means it has some quirks: - -* You can see these requests in the <>. These -sub-requests are "child" tasks of the task for the request with `slices`. -* Fetching the status of the task for the request with `slices` only contains -the status of completed slices. -* These sub-requests are individually addressable for things like cancellation -and rethrottling. -* Rethrottling the request with `slices` will rethrottle the unfinished -sub-request proportionally. -* Canceling the request with `slices` will cancel each sub-request. -* Due to the nature of `slices` each sub-request won't get a perfectly even -portion of the documents. All documents will be addressed, but some slices may -be larger than others. Expect larger slices to have a more even distribution. -* Parameters like `requests_per_second` and `max_docs` on a request with -`slices` are distributed proportionally to each sub-request. Combine that with -the point above about distribution being uneven and you should conclude that -using `max_docs` with `slices` might not result in exactly `max_docs` documents -being reindexed. -* Each sub-request gets a slightly different snapshot of the source, -though these are all taken at approximately the same time. - -[[docs-reindex-picking-slices]] -====== Picking the number of slices - -If slicing automatically, setting `slices` to `auto` will choose a reasonable -number for most indices. If slicing manually or otherwise tuning -automatic slicing, use these guidelines. - -Query performance is most efficient when the number of `slices` is equal to the -number of shards in the index. If that number is large (e.g. 500), -choose a lower number as too many `slices` will hurt performance. Setting -`slices` higher than the number of shards generally does not improve efficiency -and adds overhead. - -Indexing performance scales linearly across available resources with the -number of slices. - -Whether query or indexing performance dominates the runtime depends on the -documents being reindexed and cluster resources. - -[[docs-reindex-routing]] -===== Reindex routing - -By default if `_reindex` sees a document with routing then the routing is -preserved unless it's changed by the script. You can set `routing` on the -`dest` request to change this: - -`keep`:: - -Sets the routing on the bulk request sent for each match to the routing on -the match. This is the default value. - -`discard`:: - -Sets the routing on the bulk request sent for each match to `null`. - -`=`:: - -Sets the routing on the bulk request sent for each match to all text after -the `=`. - -For example, you can use the following request to copy all documents from -the `source` with the company name `cat` into the `dest` with -routing set to `cat`. - -[source,console] --------------------------------------------------- -POST _reindex -{ - "source": { - "index": "source", - "query": { - "match": { - "company": "cat" - } - } - }, - "dest": { - "index": "dest", - "routing": "=cat" - } -} --------------------------------------------------- -// TEST[s/^/PUT source\n/] - - - -By default `_reindex` uses scroll batches of 1000. You can change the -batch size with the `size` field in the `source` element: - -[source,console] --------------------------------------------------- -POST _reindex -{ - "source": { - "index": "source", - "size": 100 - }, - "dest": { - "index": "dest", - "routing": "=cat" - } -} --------------------------------------------------- -// TEST[s/^/PUT source\n/] - -[[reindex-with-an-ingest-pipeline]] -===== Reindex with an ingest pipeline - -Reindex can also use the <> feature by specifying a -`pipeline` like this: - -[source,console] --------------------------------------------------- -POST _reindex -{ - "source": { - "index": "source" - }, - "dest": { - "index": "dest", - "pipeline": "some_ingest_pipeline" - } -} --------------------------------------------------- -// TEST[s/^/PUT source\n/] - -[[docs-reindex-api-query-params]] -==== {api-query-parms-title} - -`refresh`:: -(Optional, Boolean) If `true`, the request refreshes affected shards to make -this operation visible to search. Defaults to `false`. - -`timeout`:: -+ --- -(Optional, <>) -Period each indexing waits for the following operations: - -* <> -* <> updates -* <> - -Defaults to `1m` (one minute). This guarantees {es} waits for at least the -timeout before failing. The actual wait time could be longer, particularly when -multiple waits occur. --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=wait_for_active_shards] - -`wait_for_completion`:: -(Optional, Boolean) If `true`, the request blocks until the operation is complete. -Defaults to `true`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=requests_per_second] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=require-alias] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=scroll] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=slices] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=max_docs] - -[[docs-reindex-api-request-body]] -==== {api-request-body-title} - -[[conflicts]] -`conflicts`:: -(Optional, enum) Set to `proceed` to continue reindexing even if there are conflicts. -Defaults to `abort`. - -`max_docs`:: -(Optional, integer) The maximum number of documents to reindex. If <> is equal to -`proceed`, reindex could attempt to reindex more documents from the source than `max_docs` until it has successfully -indexed `max_docs` documents into the target, or it has gone through every document in the source query. - -`source`:: -`index`::: -(Required, string) The name of the data stream, index, or alias you are copying -_from_. Also accepts a comma-separated list to reindex from multiple sources. - -`query`::: -(Optional, <>) Specifies the documents to reindex using the Query DSL. - -`remote`::: -`host`:::: -(Optional, string) The URL for the remote instance of {es} that you want to index _from_. -Required when indexing from remote. -`username`:::: -(Optional, string) The username to use for authentication with the remote host. -`password`:::: -(Optional, string) The password to use for authentication with the remote host. -`socket_timeout`:::: -(Optional, <>) The remote socket read timeout. Defaults to 30 seconds. -`connect_timeout`:::: -(Optional, <>) The remote connection timeout. Defaults to 30 seconds. -`headers`:::: -(Optional, object) An object containing the headers of ther request. -`size`::: -{Optional, integer) The number of documents to index per batch. -Use when indexing from remote to ensure that the batches fit within the on-heap buffer, -which defaults to a maximum size of 100 MB. - -`slice`::: -`id`:::: -(Optional, integer) Slice ID for <>. -`max`:::: -(Optional, integer) Total number of slices. - -`sort`::: -+ --- -(Optional, list) A comma-separated list of `:` pairs to sort by before indexing. -Use in conjunction with `max_docs` to control what documents are reindexed. - -deprecated::[7.6, Sort in reindex is deprecated. Sorting in reindex was never guaranteed to index documents in order and prevents further development of reindex such as resilience and performance improvements. If used in combination with `max_docs`, consider using a query filter instead.] --- - -`_source`::: -(Optional, string) If `true` reindexes all source fields. -Set to a list to reindex select fields. -Defaults to `true`. - -`dest`:: -`index`::: -(Required, string) The name of the data stream, index, or index alias you are copying _to_. - -`version_type`::: -(Optional, enum) The versioning to use for the indexing operation. -Valid values: `internal`, `external`, `external_gt`, `external_gte`. -See <> for more information. - -`op_type`::: -(Optional, enum) Set to create to only index documents that do not already exist (put if absent). -Valid values: `index`, `create`. Defaults to `index`. -+ -IMPORTANT: To reindex to a data stream destination, this argument must be -`create`. - -`pipeline`::: -(Optional, string) the name of the <> to use. - -`script`:: -`source`::: -(Optional, string) The script to run to update the document source or metadata when reindexing. -`lang`::: -(Optional, enum) The script language: `painless`, `expression`, `mustache`, `java`. -For more information, see <>. - - -[[docs-reindex-api-response-body]] -==== {api-response-body-title} - -`took`:: - -(integer) The total milliseconds the entire operation took. - -`timed_out`:: - -{Boolean) This flag is set to `true` if any of the requests executed during the -reindex timed out. - -`total`:: - -(integer) The number of documents that were successfully processed. - -`updated`:: -(integer) The number of documents that were successfully updated, -i.e. a document with same ID already existed prior to reindex updating it. - -`created`:: - -(integer) The number of documents that were successfully created. - -`deleted`:: - -(integer) The number of documents that were successfully deleted. - -`batches`:: - -(integer) The number of scroll responses pulled back by the reindex. - -`noops`:: - -(integer) The number of documents that were ignored because the script used for -the reindex returned a `noop` value for `ctx.op`. - -`version_conflicts`:: - -(integer) The number of version conflicts that reindex hits. - -`retries`:: - -(integer) The number of retries attempted by reindex. `bulk` is the number of bulk -actions retried and `search` is the number of search actions retried. - -`throttled_millis`:: - -(integer) Number of milliseconds the request slept to conform to `requests_per_second`. - -`requests_per_second`:: - -(integer) The number of requests per second effectively executed during the reindex. - -`throttled_until_millis`:: - -(integer) This field should always be equal to zero in a `_reindex` response. It only -has meaning when using the <>, where it -indicates the next time (in milliseconds since epoch) a throttled request will be -executed again in order to conform to `requests_per_second`. - -`failures`:: - -(array) Array of failures if there were any unrecoverable errors during the process. If -this is non-empty then the request aborted because of those failures. Reindex -is implemented using batches and any failure causes the entire process to abort -but all failures in the current batch are collected into the array. You can use -the `conflicts` option to prevent reindex from aborting on version conflicts. - -[[docs-reindex-api-example]] -==== {api-examples-title} - -[[docs-reindex-select-query]] -===== Reindex select documents with a query - -You can limit the documents by adding a query to the `source`. -For example, the following request only copies documents with a `user.id` of `kimchy` into `my-new-index-000001`: - -[source,console] --------------------------------------------------- -POST _reindex -{ - "source": { - "index": "my-index-000001", - "query": { - "term": { - "user.id": "kimchy" - } - } - }, - "dest": { - "index": "my-new-index-000001" - } -} --------------------------------------------------- -// TEST[setup:my_index] - -[[docs-reindex-select-max-docs]] -===== Reindex select documents with `max_docs` - -You can limit the number of processed documents by setting `max_docs`. -For example, this request copies a single document from `my-index-000001` to -`my-new-index-000001`: - -[source,console] --------------------------------------------------- -POST _reindex -{ - "max_docs": 1, - "source": { - "index": "my-index-000001" - }, - "dest": { - "index": "my-new-index-000001" - } -} --------------------------------------------------- -// TEST[setup:my_index] - -[[docs-reindex-multiple-sources]] -===== Reindex from multiple sources - -The `index` attribute in `source` can be a list, allowing you to copy from lots -of sources in one request. This will copy documents from the -`my-index-000001` and `my-index-000002` indices: - -[source,console] --------------------------------------------------- -POST _reindex -{ - "source": { - "index": ["my-index-000001", "my-index-000002"] - }, - "dest": { - "index": "my-new-index-000002" - } -} --------------------------------------------------- -// TEST[setup:my_index] -// TEST[s/^/PUT my-index-000002\/_doc\/post1?refresh\n{"test": "foo"}\n/] - -NOTE: The Reindex API makes no effort to handle ID collisions so the last -document written will "win" but the order isn't usually predictable so it is -not a good idea to rely on this behavior. Instead, make sure that IDs are unique -using a script. - -[[docs-reindex-filter-source]] -===== Reindex select fields with a source filter - -You can use source filtering to reindex a subset of the fields in the original documents. -For example, the following request only reindexes the `user.id` and `_doc` fields of each document: - -[source,console] --------------------------------------------------- -POST _reindex -{ - "source": { - "index": "my-index-000001", - "_source": ["user.id", "_doc"] - }, - "dest": { - "index": "my-new-index-000001" - } -} --------------------------------------------------- -// TEST[setup:my_index] - -[[docs-reindex-change-name]] -===== Reindex to change the name of a field - -`_reindex` can be used to build a copy of an index with renamed fields. Say you -create an index containing documents that look like this: - -[source,console] --------------------------------------------------- -POST my-index-000001/_doc/1?refresh -{ - "text": "words words", - "flag": "foo" -} --------------------------------------------------- - -but you don't like the name `flag` and want to replace it with `tag`. -`_reindex` can create the other index for you: - -[source,console] --------------------------------------------------- -POST _reindex -{ - "source": { - "index": "my-index-000001" - }, - "dest": { - "index": "my-new-index-000001" - }, - "script": { - "source": "ctx._source.tag = ctx._source.remove(\"flag\")" - } -} --------------------------------------------------- -// TEST[continued] - -Now you can get the new document: - -[source,console] --------------------------------------------------- -GET my-new-index-000001/_doc/1 --------------------------------------------------- -// TEST[continued] - -which will return: - -[source,console-result] --------------------------------------------------- -{ - "found": true, - "_id": "1", - "_index": "my-new-index-000001", - "_version": 1, - "_seq_no": 44, - "_primary_term": 1, - "_source": { - "text": "words words", - "tag": "foo" - } -} --------------------------------------------------- -// TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term": 1/"_primary_term" : $body._primary_term/] - -[[docs-reindex-daily-indices]] -===== Reindex daily indices - -You can use `_reindex` in combination with <> to reindex -daily indices to apply a new template to the existing documents. - -Assuming you have indices that contain documents like: - -[source,console] ----------------------------------------------------------------- -PUT metricbeat-2016.05.30/_doc/1?refresh -{"system.cpu.idle.pct": 0.908} -PUT metricbeat-2016.05.31/_doc/1?refresh -{"system.cpu.idle.pct": 0.105} ----------------------------------------------------------------- - -The new template for the `metricbeat-*` indices is already loaded into Elasticsearch, -but it applies only to the newly created indices. Painless can be used to reindex -the existing documents and apply the new template. - -The script below extracts the date from the index name and creates a new index -with `-1` appended. All data from `metricbeat-2016.05.31` will be reindexed -into `metricbeat-2016.05.31-1`. - -[source,console] ----------------------------------------------------------------- -POST _reindex -{ - "source": { - "index": "metricbeat-*" - }, - "dest": { - "index": "metricbeat" - }, - "script": { - "lang": "painless", - "source": "ctx._index = 'metricbeat-' + (ctx._index.substring('metricbeat-'.length(), ctx._index.length())) + '-1'" - } -} ----------------------------------------------------------------- -// TEST[continued] - -All documents from the previous metricbeat indices can now be found in the `*-1` indices. - -[source,console] ----------------------------------------------------------------- -GET metricbeat-2016.05.30-1/_doc/1 -GET metricbeat-2016.05.31-1/_doc/1 ----------------------------------------------------------------- -// TEST[continued] - -The previous method can also be used in conjunction with <> -to load only the existing data into the new index and rename any fields if needed. - -[[docs-reindex-api-subset]] -===== Extract a random subset of the source - -`_reindex` can be used to extract a random subset of the source for testing: - -[source,console] ----------------------------------------------------------------- -POST _reindex -{ - "max_docs": 10, - "source": { - "index": "my-index-000001", - "query": { - "function_score" : { - "random_score" : {}, - "min_score" : 0.9 <1> - } - } - }, - "dest": { - "index": "my-new-index-000001" - } -} ----------------------------------------------------------------- -// TEST[setup:my_index_big] - -<1> You may need to adjust the `min_score` depending on the relative amount of -data extracted from source. - -[[reindex-scripts]] -===== Modify documents during reindexing - -Like `_update_by_query`, `_reindex` supports a script that modifies the -document. Unlike `_update_by_query`, the script is allowed to modify the -document's metadata. This example bumps the version of the source document: - -[source,console] --------------------------------------------------- -POST _reindex -{ - "source": { - "index": "my-index-000001" - }, - "dest": { - "index": "my-new-index-000001", - "version_type": "external" - }, - "script": { - "source": "if (ctx._source.foo == 'bar') {ctx._version++; ctx._source.remove('foo')}", - "lang": "painless" - } -} --------------------------------------------------- -// TEST[setup:my_index] - -Just as in `_update_by_query`, you can set `ctx.op` to change the -operation that is executed on the destination: - -`noop`:: - -Set `ctx.op = "noop"` if your script decides that the document doesn't have -to be indexed in the destination. This no operation will be reported -in the `noop` counter in the <>. - -`delete`:: - -Set `ctx.op = "delete"` if your script decides that the document must be - deleted from the destination. The deletion will be reported in the - `deleted` counter in the <>. - -Setting `ctx.op` to anything else will return an error, as will setting any -other field in `ctx`. - -Think of the possibilities! Just be careful; you are able to -change: - - * `_id` - * `_index` - * `_version` - * `_routing` - -Setting `_version` to `null` or clearing it from the `ctx` map is just like not -sending the version in an indexing request; it will cause the document to be -overwritten in the destination regardless of the version on the target or the -version type you use in the `_reindex` request. - -[[reindex-from-remote]] -==== Reindex from remote - -Reindex supports reindexing from a remote Elasticsearch cluster: - -[source,console] --------------------------------------------------- -POST _reindex -{ - "source": { - "remote": { - "host": "http://otherhost:9200", - "username": "user", - "password": "pass" - }, - "index": "my-index-000001", - "query": { - "match": { - "test": "data" - } - } - }, - "dest": { - "index": "my-new-index-000001" - } -} --------------------------------------------------- -// TEST[setup:host] -// TEST[s/^/PUT my-index-000001\n/] -// TEST[s/otherhost:9200",/\${host}",/] -// TEST[s/"username": "user",/"username": "test_admin",/] -// TEST[s/"password": "pass"/"password": "x-pack-test-password"/] - -The `host` parameter must contain a scheme, host, port (e.g. -`https://otherhost:9200`), and optional path (e.g. `https://otherhost:9200/proxy`). -The `username` and `password` parameters are optional, and when they are present `_reindex` -will connect to the remote Elasticsearch node using basic auth. Be sure to use `https` when -using basic auth or the password will be sent in plain text. -There are a range of <> available to configure the behaviour of the - `https` connection. - -When using {ecloud}, it is also possible to authenticate against the remote cluster -through the use of a valid API key: - -[source,console] ----- -POST _reindex -{ - "source": { - "remote": { - "host": "http://otherhost:9200", - "headers": { - "Authorization": "ApiKey API_KEY_VALUE" - } - }, - "index": "my-index-000001", - "query": { - "match": { - "test": "data" - } - } - }, - "dest": { - "index": "my-new-index-000001" - } -} ----- -// TEST[setup:host] -// TEST[s/^/PUT my-index-000001\n/] -// TEST[s/otherhost:9200",/\${host}",/] -// TEST[s/"headers": \{[^}]*\}/"username": "test_admin", "password": "x-pack-test-password"/] - -Remote hosts have to be explicitly allowed in `elasticsearch.yml` using the -`reindex.remote.whitelist` property. It can be set to a comma delimited list -of allowed remote `host` and `port` combinations. Scheme is -ignored, only the host and port are used. For example: - - -[source,yaml] --------------------------------------------------- -reindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*"] --------------------------------------------------- - -The list of allowed hosts must be configured on any nodes that will coordinate the reindex. - -This feature should work with remote clusters of any version of Elasticsearch -you are likely to find. This should allow you to upgrade from any version of -Elasticsearch to the current version by reindexing from a cluster of the old -version. - -WARNING: {es} does not support forward compatibility across major versions. For -example, you cannot reindex from a 7.x cluster into a 6.x cluster. - -To enable queries sent to older versions of Elasticsearch the `query` parameter -is sent directly to the remote host without validation or modification. - -include::{es-ref-dir}/docs/reindex.asciidoc[tag=remote-reindex-slicing] - -Reindexing from a remote server uses an on-heap buffer that defaults to a -maximum size of 100mb. If the remote index includes very large documents you'll -need to use a smaller batch size. The example below sets the batch size to `10` -which is very, very small. - -[source,console] --------------------------------------------------- -POST _reindex -{ - "source": { - "remote": { - "host": "http://otherhost:9200", - ... - }, - "index": "source", - "size": 10, - "query": { - "match": { - "test": "data" - } - } - }, - "dest": { - "index": "dest" - } -} --------------------------------------------------- -// TEST[setup:host] -// TEST[s/^/PUT source\n/] -// TEST[s/otherhost:9200/\${host}/] -// TEST[s/\.\.\./"username": "test_admin", "password": "x-pack-test-password"/] - -It is also possible to set the socket read timeout on the remote connection -with the `socket_timeout` field and the connection timeout with the -`connect_timeout` field. Both default to 30 seconds. This example -sets the socket read timeout to one minute and the connection timeout to 10 -seconds: - -[source,console] --------------------------------------------------- -POST _reindex -{ - "source": { - "remote": { - "host": "http://otherhost:9200", - ..., - "socket_timeout": "1m", - "connect_timeout": "10s" - }, - "index": "source", - "query": { - "match": { - "test": "data" - } - } - }, - "dest": { - "index": "dest" - } -} --------------------------------------------------- -// TEST[setup:host] -// TEST[s/^/PUT source\n/] -// TEST[s/otherhost:9200/\${host}/] -// TEST[s/\.\.\.,/"username": "test_admin", "password": "x-pack-test-password",/] - -[[reindex-ssl]] -===== Configuring SSL parameters - -Reindex from remote supports configurable SSL settings. These must be -specified in the `elasticsearch.yml` file, with the exception of the -secure settings, which you add in the Elasticsearch keystore. -It is not possible to configure SSL in the body of the `_reindex` request. -Refer to <>. diff --git a/docs/reference/docs/termvectors.asciidoc b/docs/reference/docs/termvectors.asciidoc deleted file mode 100644 index d40452fb4875a..0000000000000 --- a/docs/reference/docs/termvectors.asciidoc +++ /dev/null @@ -1,481 +0,0 @@ -[[docs-termvectors]] -=== Term vectors API -++++ -Term vectors -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-document[Document APIs]. --- - -Retrieves information and statistics for terms in the fields of a particular document. - -[source,console] --------------------------------------------------- -GET /my-index-000001/_termvectors/1 --------------------------------------------------- -// TEST[setup:my_index] - -[[docs-termvectors-api-request]] -==== {api-request-title} - -`GET //_termvectors/<_id>` - -[[docs-termvectors-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `read` -<> for the target index or index alias. - -[[docs-termvectors-api-desc]] -==== {api-description-title} - -You can retrieve term vectors for documents stored in the index or -for _artificial_ documents passed in the body of the request. - -You can specify the fields you are interested in through the `fields` parameter, -or by adding the fields to the request body. - -[source,console] --------------------------------------------------- -GET /my-index-000001/_termvectors/1?fields=message --------------------------------------------------- -// TEST[setup:my_index] - -Fields can be specified using wildcards, similar to the <>. - -Term vectors are <> by default, not near real-time. -This can be changed by setting `realtime` parameter to `false`. - -You can request three types of values: _term information_, _term statistics_ -and _field statistics_. By default, all term information and field -statistics are returned for all fields but term statistics are excluded. - -[[docs-termvectors-api-term-info]] -===== Term information - - * term frequency in the field (always returned) - * term positions (`positions` : true) - * start and end offsets (`offsets` : true) - * term payloads (`payloads` : true), as base64 encoded bytes - -If the requested information wasn't stored in the index, it will be -computed on the fly if possible. Additionally, term vectors could be computed -for documents not even existing in the index, but instead provided by the user. - -[WARNING] -====== -Start and end offsets assume UTF-16 encoding is being used. If you want to use -these offsets in order to get the original text that produced this token, you -should make sure that the string you are taking a sub-string of is also encoded -using UTF-16. -====== - -[[docs-termvectors-api-term-stats]] -===== Term statistics - -Setting `term_statistics` to `true` (default is `false`) will -return - - * total term frequency (how often a term occurs in all documents) + - * document frequency (the number of documents containing the current - term) - -By default these values are not returned since term statistics can -have a serious performance impact. - -[[docs-termvectors-api-field-stats]] -===== Field statistics - -Setting `field_statistics` to `false` (default is `true`) will -omit : - - * document count (how many documents contain this field) - * sum of document frequencies (the sum of document frequencies for all - terms in this field) - * sum of total term frequencies (the sum of total term frequencies of - each term in this field) - -[[docs-termvectors-api-terms-filtering]] -===== Terms filtering - -With the parameter `filter`, the terms returned could also be filtered based -on their tf-idf scores. This could be useful in order find out a good -characteristic vector of a document. This feature works in a similar manner to -the <> of the -<>. See <> -for usage. - -The following sub-parameters are supported: - -[horizontal] -`max_num_terms`:: - Maximum number of terms that must be returned per field. Defaults to `25`. -`min_term_freq`:: - Ignore words with less than this frequency in the source doc. Defaults to `1`. -`max_term_freq`:: - Ignore words with more than this frequency in the source doc. Defaults to unbounded. -`min_doc_freq`:: - Ignore terms which do not occur in at least this many docs. Defaults to `1`. -`max_doc_freq`:: - Ignore words which occur in more than this many docs. Defaults to unbounded. -`min_word_length`:: - The minimum word length below which words will be ignored. Defaults to `0`. -`max_word_length`:: - The maximum word length above which words will be ignored. Defaults to unbounded (`0`). - -[[docs-termvectors-api-behavior]] -==== Behaviour - -The term and field statistics are not accurate. Deleted documents -are not taken into account. The information is only retrieved for the -shard the requested document resides in. -The term and field statistics are therefore only useful as relative measures -whereas the absolute numbers have no meaning in this context. By default, -when requesting term vectors of artificial documents, a shard to get the statistics -from is randomly selected. Use `routing` only to hit a particular shard. - -[[docs-termvectors-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) Name of the index that contains the document. - -`<_id>`:: -(Optional, string) Unique identifier of the document. - -[[docs-termvectors-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=fields] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=field_statistics] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=offsets] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=payloads] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=positions] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=preference] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=routing] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=realtime] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=term_statistics] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=version] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=version_type] - -[[docs-termvectors-api-example]] -==== {api-examples-title} - -[[docs-termvectors-api-stored-termvectors]] -===== Returning stored term vectors - -First, we create an index that stores term vectors, payloads etc. : - -[source,console] --------------------------------------------------- -PUT /my-index-000001 -{ "mappings": { - "properties": { - "text": { - "type": "text", - "term_vector": "with_positions_offsets_payloads", - "store" : true, - "analyzer" : "fulltext_analyzer" - }, - "fullname": { - "type": "text", - "term_vector": "with_positions_offsets_payloads", - "analyzer" : "fulltext_analyzer" - } - } - }, - "settings" : { - "index" : { - "number_of_shards" : 1, - "number_of_replicas" : 0 - }, - "analysis": { - "analyzer": { - "fulltext_analyzer": { - "type": "custom", - "tokenizer": "whitespace", - "filter": [ - "lowercase", - "type_as_payload" - ] - } - } - } - } -} --------------------------------------------------- - -Second, we add some documents: - -[source,console] --------------------------------------------------- -PUT /my-index-000001/_doc/1 -{ - "fullname" : "John Doe", - "text" : "test test test " -} - -PUT /my-index-000001/_doc/2?refresh=wait_for -{ - "fullname" : "Jane Doe", - "text" : "Another test ..." -} --------------------------------------------------- -// TEST[continued] - -The following request returns all information and statistics for field -`text` in document `1` (John Doe): - -[source,console] --------------------------------------------------- -GET /my-index-000001/_termvectors/1 -{ - "fields" : ["text"], - "offsets" : true, - "payloads" : true, - "positions" : true, - "term_statistics" : true, - "field_statistics" : true -} --------------------------------------------------- -// TEST[continued] - -Response: - -[source,console-result] --------------------------------------------------- -{ - "_index": "my-index-000001", - "_id": "1", - "_version": 1, - "found": true, - "took": 6, - "term_vectors": { - "text": { - "field_statistics": { - "sum_doc_freq": 4, - "doc_count": 2, - "sum_ttf": 6 - }, - "terms": { - "test": { - "doc_freq": 2, - "ttf": 4, - "term_freq": 3, - "tokens": [ - { - "position": 0, - "start_offset": 0, - "end_offset": 4, - "payload": "d29yZA==" - }, - { - "position": 1, - "start_offset": 5, - "end_offset": 9, - "payload": "d29yZA==" - }, - { - "position": 2, - "start_offset": 10, - "end_offset": 14, - "payload": "d29yZA==" - } - ] - } - } - } - } -} --------------------------------------------------- -// TEST[continued] -// TESTRESPONSE[s/"took": 6/"took": "$body.took"/] - -[[docs-termvectors-api-generate-termvectors]] -===== Generating term vectors on the fly - -Term vectors which are not explicitly stored in the index are automatically -computed on the fly. The following request returns all information and statistics for the -fields in document `1`, even though the terms haven't been explicitly stored in the index. -Note that for the field `text`, the terms are not re-generated. - -[source,console] --------------------------------------------------- -GET /my-index-000001/_termvectors/1 -{ - "fields" : ["text", "some_field_without_term_vectors"], - "offsets" : true, - "positions" : true, - "term_statistics" : true, - "field_statistics" : true -} --------------------------------------------------- -// TEST[continued] - -[[docs-termvectors-artificial-doc]] -===== Artificial documents - -Term vectors can also be generated for artificial documents, -that is for documents not present in the index. For example, the following request would -return the same results as in example 1. The mapping used is determined by the `index`. - -*If dynamic mapping is turned on (default), the document fields not in the original -mapping will be dynamically created.* - -[source,console] --------------------------------------------------- -GET /my-index-000001/_termvectors -{ - "doc" : { - "fullname" : "John Doe", - "text" : "test test test" - } -} --------------------------------------------------- -// TEST[continued] - -[[docs-termvectors-per-field-analyzer]] -====== Per-field analyzer - -Additionally, a different analyzer than the one at the field may be provided -by using the `per_field_analyzer` parameter. This is useful in order to -generate term vectors in any fashion, especially when using artificial -documents. When providing an analyzer for a field that already stores term -vectors, the term vectors will be re-generated. - -[source,console] --------------------------------------------------- -GET /my-index-000001/_termvectors -{ - "doc" : { - "fullname" : "John Doe", - "text" : "test test test" - }, - "fields": ["fullname"], - "per_field_analyzer" : { - "fullname": "keyword" - } -} --------------------------------------------------- -// TEST[continued] - -Response: - -[source,console-result] --------------------------------------------------- -{ - "_index": "my-index-000001", - "_version": 0, - "found": true, - "took": 6, - "term_vectors": { - "fullname": { - "field_statistics": { - "sum_doc_freq": 2, - "doc_count": 4, - "sum_ttf": 4 - }, - "terms": { - "John Doe": { - "term_freq": 1, - "tokens": [ - { - "position": 0, - "start_offset": 0, - "end_offset": 8 - } - ] - } - } - } - } -} --------------------------------------------------- -// TEST[continued] -// TESTRESPONSE[s/"took": 6/"took": "$body.took"/] -// TESTRESPONSE[s/"sum_doc_freq": 2/"sum_doc_freq": "$body.term_vectors.fullname.field_statistics.sum_doc_freq"/] -// TESTRESPONSE[s/"doc_count": 4/"doc_count": "$body.term_vectors.fullname.field_statistics.doc_count"/] -// TESTRESPONSE[s/"sum_ttf": 4/"sum_ttf": "$body.term_vectors.fullname.field_statistics.sum_ttf"/] - - -[[docs-termvectors-terms-filtering]] -===== Terms filtering - -Finally, the terms returned could be filtered based on their tf-idf scores. In -the example below we obtain the three most "interesting" keywords from the -artificial document having the given "plot" field value. Notice -that the keyword "Tony" or any stop words are not part of the response, as -their tf-idf must be too low. - -[source,console] --------------------------------------------------- -GET /imdb/_termvectors -{ - "doc": { - "plot": "When wealthy industrialist Tony Stark is forced to build an armored suit after a life-threatening incident, he ultimately decides to use its technology to fight against evil." - }, - "term_statistics": true, - "field_statistics": true, - "positions": false, - "offsets": false, - "filter": { - "max_num_terms": 3, - "min_term_freq": 1, - "min_doc_freq": 1 - } -} --------------------------------------------------- -// TEST[skip:no imdb test index] - -Response: - -[source,console-result] --------------------------------------------------- -{ - "_index": "imdb", - "_version": 0, - "found": true, - "term_vectors": { - "plot": { - "field_statistics": { - "sum_doc_freq": 3384269, - "doc_count": 176214, - "sum_ttf": 3753460 - }, - "terms": { - "armored": { - "doc_freq": 27, - "ttf": 27, - "term_freq": 1, - "score": 9.74725 - }, - "industrialist": { - "doc_freq": 88, - "ttf": 88, - "term_freq": 1, - "score": 8.590818 - }, - "stark": { - "doc_freq": 44, - "ttf": 47, - "term_freq": 1, - "score": 9.272792 - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/docs/update-by-query.asciidoc b/docs/reference/docs/update-by-query.asciidoc deleted file mode 100644 index c8d68082c8ea1..0000000000000 --- a/docs/reference/docs/update-by-query.asciidoc +++ /dev/null @@ -1,806 +0,0 @@ -[[docs-update-by-query]] -=== Update By Query API -++++ -Update by query -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-document[Document APIs]. --- - -Updates documents that match the specified query. -If no query is specified, performs an update on every document in the data stream or index without -modifying the source, which is useful for picking up mapping changes. - -[source,console] --------------------------------------------------- -POST my-index-000001/_update_by_query?conflicts=proceed --------------------------------------------------- -// TEST[setup:my_index_big] - -//// - -[source,console-result] --------------------------------------------------- -{ - "took" : 147, - "timed_out": false, - "updated": 120, - "deleted": 0, - "batches": 1, - "version_conflicts": 0, - "noops": 0, - "retries": { - "bulk": 0, - "search": 0 - }, - "throttled_millis": 0, - "requests_per_second": -1.0, - "throttled_until_millis": 0, - "total": 120, - "failures" : [ ] -} --------------------------------------------------- -// TESTRESPONSE[s/"took" : 147/"took" : "$body.took"/] - -//// - -[[docs-update-by-query-api-request]] -==== {api-request-title} - -`POST //_update_by_query` - -[[docs-update-by-query-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the following -<> for the target data stream, index, -or alias: - -** `read` -** `index` or `write` - -[[docs-update-by-query-api-desc]] -==== {api-description-title} - -You can specify the query criteria in the request URI or the request body -using the same syntax as the <>. - -When you submit an update by query request, {es} gets a snapshot of the data stream or index -when it begins processing the request and updates matching documents using -`internal` versioning. -When the versions match, the document is updated and the version number is incremented. -If a document changes between the time that the snapshot is taken and -the update operation is processed, it results in a version conflict and the operation fails. -You can opt to count version conflicts instead of halting and returning by -setting `conflicts` to `proceed`. Note that if you opt to count -version conflicts the operation could attempt to update more documents from the source than -`max_docs` until it has successfully updated `max_docs` documents, or it has gone through every document -in the source query. - -NOTE: Documents with a version equal to 0 cannot be updated using update by -query because `internal` versioning does not support 0 as a valid -version number. - -While processing an update by query request, {es} performs multiple search -requests sequentially to find all of the matching documents. -A bulk update request is performed for each batch of matching documents. -Any query or update failures cause the update by query request to fail and -the failures are shown in the response. -Any update requests that completed successfully still stick, they are not rolled back. - -===== Refreshing shards - -Specifying the `refresh` parameter refreshes all shards once the request completes. -This is different than the update API's `refresh` parameter, which causes just the shard -that received the request to be refreshed. Unlike the update API, it does not support -`wait_for`. - -[[docs-update-by-query-task-api]] -===== Running update by query asynchronously - -If the request contains `wait_for_completion=false`, {es} -performs some preflight checks, launches the request, and returns a -<> you can use to cancel or get the status of the task. -{es} creates a record of this task as a document at `.tasks/task/${taskId}`. - -===== Waiting for active shards - -`wait_for_active_shards` controls how many copies of a shard must be active -before proceeding with the request. See <> -for details. `timeout` controls how long each write request waits for unavailable -shards to become available. Both work exactly the way they work in the -<>. Update by query uses scrolled searches, so you can also -specify the `scroll` parameter to control how long it keeps the search context -alive, for example `?scroll=10m`. The default is 5 minutes. - -===== Throttling update requests - -To control the rate at which update by query issues batches of update operations, -you can set `requests_per_second` to any positive decimal number. This pads each -batch with a wait time to throttle the rate. Set `requests_per_second` to `-1` -to disable throttling. - -Throttling uses a wait time between batches so that the internal scroll requests -can be given a timeout that takes the request padding into account. The padding -time is the difference between the batch size divided by the -`requests_per_second` and the time spent writing. By default the batch size is -`1000`, so if `requests_per_second` is set to `500`: - -[source,txt] --------------------------------------------------- -target_time = 1000 / 500 per second = 2 seconds -wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds --------------------------------------------------- - -Since the batch is issued as a single `_bulk` request, large batch sizes -cause {es} to create many requests and wait before starting the next set. -This is "bursty" instead of "smooth". - -[[docs-update-by-query-slice]] -===== Slicing - -Update by query supports <> to parallelize the -update process. This can improve efficiency and provide a -convenient way to break the request down into smaller parts. - -Setting `slices` to `auto` chooses a reasonable number for most data streams and indices. -If you're slicing manually or otherwise tuning automatic slicing, keep in mind -that: - -* Query performance is most efficient when the number of `slices` is equal to -the number of shards in the index or backing index. If that number is large (for example, -500), choose a lower number as too many `slices` hurts performance. Setting -`slices` higher than the number of shards generally does not improve efficiency -and adds overhead. - -* Update performance scales linearly across available resources with the -number of slices. - -Whether query or update performance dominates the runtime depends on the -documents being reindexed and cluster resources. - -[[docs-update-by-query-api-path-params]] -==== {api-path-parms-title} - -``:: -(Optional, string) Comma-separated list of data streams, indices, and aliases to -search. Supports wildcards (`*`). To search all data streams or indices, omit -this parameter or use `*` or `_all`. - -[[docs-update-by-query-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=allow-no-indices] -+ -Defaults to `true`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=analyzer] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=analyze_wildcard] - -`conflicts`:: - (Optional, string) What to do if update by query hits version conflicts: - `abort` or `proceed`. Defaults to `abort`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=default_operator] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=df] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=expand-wildcards] -+ -Defaults to `open`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=index-ignore-unavailable] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=lenient] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=max_docs] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=pipeline] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=preference] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=search-q] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=request_cache] - -`refresh`:: -(Optional, Boolean) -If `true`, {es} refreshes affected shards to make the operation visible to -search. Defaults to `false`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=requests_per_second] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=routing] - -`scroll`:: -(Optional, <>) -Period to retain the <> for scrolling. See -<>. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=scroll_size] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=search_type] - -`search_timeout`:: -(Optional, <>) -Explicit timeout for each search request. -Defaults to no timeout. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=slices] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=sort] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=stats] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=terminate_after] - -`timeout`:: -+ --- -(Optional, <>) -Period each update request waits for the following operations: - -* Dynamic mapping updates -* <> - -Defaults to `1m` (one minute). This guarantees {es} waits for at least the -timeout before failing. The actual wait time could be longer, particularly when -multiple waits occur. --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=version] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=wait_for_active_shards] - -[[docs-update-by-query-api-request-body]] -==== {api-request-body-title} - -`query`:: - (Optional, <>) Specifies the documents to update - using the <>. - - -[[docs-update-by-query-api-response-body]] -==== Response body - -`took`:: -The number of milliseconds from start to end of the whole operation. - -`timed_out`:: -This flag is set to `true` if any of the requests executed during the -update by query execution has timed out. - -`total`:: -The number of documents that were successfully processed. - -`updated`:: -The number of documents that were successfully updated. - -`deleted`:: -The number of documents that were successfully deleted. - -`batches`:: -The number of scroll responses pulled back by the update by query. - -`version_conflicts`:: -The number of version conflicts that the update by query hit. - -`noops`:: -The number of documents that were ignored because the script used for -the update by query returned a `noop` value for `ctx.op`. - -`retries`:: -The number of retries attempted by update by query. `bulk` is the number of bulk -actions retried, and `search` is the number of search actions retried. - -`throttled_millis`:: -Number of milliseconds the request slept to conform to `requests_per_second`. - -`requests_per_second`:: -The number of requests per second effectively executed during the update by query. - -`throttled_until_millis`:: -This field should always be equal to zero in an `_update_by_query` response. It only -has meaning when using the <>, where it -indicates the next time (in milliseconds since epoch) a throttled request will be -executed again in order to conform to `requests_per_second`. - -`failures`:: -Array of failures if there were any unrecoverable errors during the process. If -this is non-empty then the request aborted because of those failures. -Update by query is implemented using batches. Any failure causes the entire -process to abort, but all failures in the current batch are collected into the -array. You can use the `conflicts` option to prevent reindex from aborting on -version conflicts. - -[[docs-update-by-query-api-example]] -==== {api-examples-title} - -The simplest usage of `_update_by_query` just performs an update on every -document in the data stream or index without changing the source. This is useful to -<> or some other online -mapping change. - -To update selected documents, specify a query in the request body: - -[source,console] --------------------------------------------------- -POST my-index-000001/_update_by_query?conflicts=proceed -{ - "query": { <1> - "term": { - "user.id": "kimchy" - } - } -} --------------------------------------------------- -// TEST[setup:my_index] - -<1> The query must be passed as a value to the `query` key, in the same -way as the <>. You can also use the `q` -parameter in the same way as the search API. - -Update documents in multiple data streams or indices: - -[source,console] --------------------------------------------------- -POST my-index-000001,my-index-000002/_update_by_query --------------------------------------------------- -// TEST[s/^/PUT my-index-000001\nPUT my-index-000002\n/] - -Limit the update by query operation to shards that a particular routing value: - -[source,console] --------------------------------------------------- -POST my-index-000001/_update_by_query?routing=1 --------------------------------------------------- -// TEST[setup:my_index] - -By default update by query uses scroll batches of 1000. -You can change the batch size with the `scroll_size` parameter: - -[source,console] --------------------------------------------------- -POST my-index-000001/_update_by_query?scroll_size=100 --------------------------------------------------- -// TEST[setup:my_index] - -Update a document using a unique attribute: - -[source,console] --------------------------------------------------- -POST my-index-000001/_update_by_query -{ - "query": { - "term": { - "user.id": "kimchy" - } - }, - "max_docs": 1 -} --------------------------------------------------- -// TEST[setup:my_index] - -[[docs-update-by-query-api-source]] -===== Update the document source - -Update by query supports scripts to update the document source. -For example, the following request increments the `count` field for all -documents with a `user.id` of `kimchy` in `my-index-000001`: - -//// -[source,console] ----- -PUT my-index-000001/_create/1 -{ - "user": { - "id": "kimchy" - }, - "count": 1 -} ----- -//// - -[source,console] --------------------------------------------------- -POST my-index-000001/_update_by_query -{ - "script": { - "source": "ctx._source.count++", - "lang": "painless" - }, - "query": { - "term": { - "user.id": "kimchy" - } - } -} --------------------------------------------------- -// TEST[continued] - -Note that `conflicts=proceed` is not specified in this example. In this case, a -version conflict should halt the process so you can handle the failure. - -As with the <>, you can set `ctx.op` to change the -operation that is performed: - -[horizontal] -`noop`:: -Set `ctx.op = "noop"` if your script decides that it doesn't have to make any changes. -The update by query operation skips updating the document and increments the `noop` counter. - -`delete`:: -Set `ctx.op = "delete"` if your script decides that the document should be deleted. -The update by query operation deletes the document and increments the `deleted` counter. - -Update by query only supports `index`, `noop`, and `delete`. -Setting `ctx.op` to anything else is an error. Setting any other field in `ctx` is an error. -This API only enables you to modify the source of matching documents, you cannot move them. - -[[docs-update-by-query-api-ingest-pipeline]] -===== Update documents using an ingest pipeline - -Update by query can use the <> feature by specifying a `pipeline`: - -[source,console] --------------------------------------------------- -PUT _ingest/pipeline/set-foo -{ - "description" : "sets foo", - "processors" : [ { - "set" : { - "field": "foo", - "value": "bar" - } - } ] -} -POST my-index-000001/_update_by_query?pipeline=set-foo --------------------------------------------------- -// TEST[setup:my_index] - - -[discrete] -[[docs-update-by-query-fetch-tasks]] -===== Get the status of update by query operations - -You can fetch the status of all running update by query requests with the -<>: - -[source,console] --------------------------------------------------- -GET _tasks?detailed=true&actions=*byquery --------------------------------------------------- -// TEST[skip:No tasks to retrieve] - -The responses looks like: - -[source,console-result] --------------------------------------------------- -{ - "nodes" : { - "r1A2WoRbTwKZ516z6NEs5A" : { - "name" : "r1A2WoR", - "transport_address" : "127.0.0.1:9300", - "host" : "127.0.0.1", - "ip" : "127.0.0.1:9300", - "attributes" : { - "testattr" : "test", - "portsfile" : "true" - }, - "tasks" : { - "r1A2WoRbTwKZ516z6NEs5A:36619" : { - "node" : "r1A2WoRbTwKZ516z6NEs5A", - "id" : 36619, - "type" : "transport", - "action" : "indices:data/write/update/byquery", - "status" : { <1> - "total" : 6154, - "updated" : 3500, - "created" : 0, - "deleted" : 0, - "batches" : 4, - "version_conflicts" : 0, - "noops" : 0, - "retries": { - "bulk": 0, - "search": 0 - }, - "throttled_millis": 0 - }, - "description" : "" - } - } - } - } -} --------------------------------------------------- - -<1> This object contains the actual status. It is just like the response JSON -with the important addition of the `total` field. `total` is the total number -of operations that the reindex expects to perform. You can estimate the -progress by adding the `updated`, `created`, and `deleted` fields. The request -will finish when their sum is equal to the `total` field. - -With the task id you can look up the task directly. The following example -retrieves information about task `r1A2WoRbTwKZ516z6NEs5A:36619`: - -[source,console] --------------------------------------------------- -GET /_tasks/r1A2WoRbTwKZ516z6NEs5A:36619 --------------------------------------------------- -// TEST[catch:missing] - -The advantage of this API is that it integrates with `wait_for_completion=false` -to transparently return the status of completed tasks. If the task is completed -and `wait_for_completion=false` was set on it, then it'll come back with a -`results` or an `error` field. The cost of this feature is the document that -`wait_for_completion=false` creates at `.tasks/task/${taskId}`. It is up to -you to delete that document. - - -[discrete] -[[docs-update-by-query-cancel-task-api]] -===== Cancel an update by query operation - -Any update by query can be cancelled using the <>: - -[source,console] --------------------------------------------------- -POST _tasks/r1A2WoRbTwKZ516z6NEs5A:36619/_cancel --------------------------------------------------- - -The task ID can be found using the <>. - -Cancellation should happen quickly but might take a few seconds. The task status -API above will continue to list the update by query task until this task checks -that it has been cancelled and terminates itself. - - -[discrete] -[[docs-update-by-query-rethrottle]] -===== Change throttling for a request - -The value of `requests_per_second` can be changed on a running update by query -using the `_rethrottle` API: - -[source,console] --------------------------------------------------- -POST _update_by_query/r1A2WoRbTwKZ516z6NEs5A:36619/_rethrottle?requests_per_second=-1 --------------------------------------------------- - -The task ID can be found using the <>. - -Just like when setting it on the `_update_by_query` API, `requests_per_second` -can be either `-1` to disable throttling or any decimal number -like `1.7` or `12` to throttle to that level. Rethrottling that speeds up the -query takes effect immediately, but rethrotting that slows down the query will -take effect after completing the current batch. This prevents scroll -timeouts. - -[discrete] -[[docs-update-by-query-manual-slice]] -===== Slice manually -Slice an update by query manually by providing a slice id and total number of -slices to each request: - -[source,console] ----------------------------------------------------------------- -POST my-index-000001/_update_by_query -{ - "slice": { - "id": 0, - "max": 2 - }, - "script": { - "source": "ctx._source['extra'] = 'test'" - } -} -POST my-index-000001/_update_by_query -{ - "slice": { - "id": 1, - "max": 2 - }, - "script": { - "source": "ctx._source['extra'] = 'test'" - } -} ----------------------------------------------------------------- -// TEST[setup:my_index_big] - -Which you can verify works with: - -[source,console] ----------------------------------------------------------------- -GET _refresh -POST my-index-000001/_search?size=0&q=extra:test&filter_path=hits.total ----------------------------------------------------------------- -// TEST[continued] - -Which results in a sensible `total` like this one: - -[source,console-result] ----------------------------------------------------------------- -{ - "hits": { - "total": { - "value": 120, - "relation": "eq" - } - } -} ----------------------------------------------------------------- - -[discrete] -[[docs-update-by-query-automatic-slice]] -===== Use automatic slicing - -You can also let update by query automatically parallelize using -<> to slice on `_id`. Use `slices` to specify the number of -slices to use: - -[source,console] ----------------------------------------------------------------- -POST my-index-000001/_update_by_query?refresh&slices=5 -{ - "script": { - "source": "ctx._source['extra'] = 'test'" - } -} ----------------------------------------------------------------- -// TEST[setup:my_index_big] - -Which you also can verify works with: - -[source,console] ----------------------------------------------------------------- -POST my-index-000001/_search?size=0&q=extra:test&filter_path=hits.total ----------------------------------------------------------------- -// TEST[continued] - -Which results in a sensible `total` like this one: - -[source,console-result] ----------------------------------------------------------------- -{ - "hits": { - "total": { - "value": 120, - "relation": "eq" - } - } -} ----------------------------------------------------------------- - -Setting `slices` to `auto` will let Elasticsearch choose the number of slices -to use. This setting will use one slice per shard, up to a certain limit. If -there are multiple source data streams or indices, it will choose the number of slices based -on the index or backing index with the smallest number of shards. - -Adding `slices` to `_update_by_query` just automates the manual process used in -the section above, creating sub-requests which means it has some quirks: - -* You can see these requests in the -<>. These sub-requests are "child" -tasks of the task for the request with `slices`. -* Fetching the status of the task for the request with `slices` only contains -the status of completed slices. -* These sub-requests are individually addressable for things like cancellation -and rethrottling. -* Rethrottling the request with `slices` will rethrottle the unfinished -sub-request proportionally. -* Canceling the request with `slices` will cancel each sub-request. -* Due to the nature of `slices` each sub-request won't get a perfectly even -portion of the documents. All documents will be addressed, but some slices may -be larger than others. Expect larger slices to have a more even distribution. -* Parameters like `requests_per_second` and `max_docs` on a request with -`slices` are distributed proportionally to each sub-request. Combine that with -the point above about distribution being uneven and you should conclude that -using `max_docs` with `slices` might not result in exactly `max_docs` documents -being updated. -* Each sub-request gets a slightly different snapshot of the source data stream or index -though these are all taken at approximately the same time. - -[discrete] -[[picking-up-a-new-property]] -===== Pick up a new property - -Say you created an index without dynamic mapping, filled it with data, and then -added a mapping value to pick up more fields from the data: - -[source,console] --------------------------------------------------- -PUT test -{ - "mappings": { - "dynamic": false, <1> - "properties": { - "text": {"type": "text"} - } - } -} - -POST test/_doc?refresh -{ - "text": "words words", - "flag": "bar" -} -POST test/_doc?refresh -{ - "text": "words words", - "flag": "foo" -} -PUT test/_mapping <2> -{ - "properties": { - "text": {"type": "text"}, - "flag": {"type": "text", "analyzer": "keyword"} - } -} --------------------------------------------------- - -<1> This means that new fields won't be indexed, just stored in `_source`. - -<2> This updates the mapping to add the new `flag` field. To pick up the new -field you have to reindex all documents with it. - -Searching for the data won't find anything: - -[source,console] --------------------------------------------------- -POST test/_search?filter_path=hits.total -{ - "query": { - "match": { - "flag": "foo" - } - } -} --------------------------------------------------- -// TEST[continued] - -[source,console-result] --------------------------------------------------- -{ - "hits" : { - "total": { - "value": 0, - "relation": "eq" - } - } -} --------------------------------------------------- - -But you can issue an `_update_by_query` request to pick up the new mapping: - -[source,console] --------------------------------------------------- -POST test/_update_by_query?refresh&conflicts=proceed -POST test/_search?filter_path=hits.total -{ - "query": { - "match": { - "flag": "foo" - } - } -} --------------------------------------------------- -// TEST[continued] - -[source,console-result] --------------------------------------------------- -{ - "hits" : { - "total": { - "value": 1, - "relation": "eq" - } - } -} --------------------------------------------------- - -You can do the exact same thing when adding a field to a multifield. diff --git a/docs/reference/docs/update.asciidoc b/docs/reference/docs/update.asciidoc deleted file mode 100644 index 62201f5748b7d..0000000000000 --- a/docs/reference/docs/update.asciidoc +++ /dev/null @@ -1,377 +0,0 @@ -[[docs-update]] -=== Update API -++++ -Update -++++ - -.New API reference -[sidebar] --- -For the most up-to-date API details, refer to {api-es}/group/endpoint-document[Document APIs]. --- - -Updates a document using the specified script. - -[[docs-update-api-request]] -==== {api-request-title} - -`POST //_update/<_id>` - -[[docs-update-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the `index` or -`write` <> for the target index or -index alias. - -[[update-api-desc]] -==== {api-description-title} - -Enables you to script document updates. The script can update, delete, or skip -modifying the document. The update API also supports passing a partial document, -which is merged into the existing document. To fully replace an existing -document, use the <>. - -This operation: - -. Gets the document (collocated with the shard) from the index. -. Runs the specified script. -. Indexes the result. - -The document must still be reindexed, but using `update` removes some network -roundtrips and reduces chances of version conflicts between the GET and the -index operation. - -The `_source` field must be enabled to use `update`. In addition to `_source`, -you can access the following variables through the `ctx` map: `_index`, -`_type`, `_id`, `_version`, `_routing`, and `_now` (the current timestamp). - -[[docs-update-api-path-params]] -==== {api-path-parms-title} - -``:: -(Required, string) Name of the target index. By default, the index is created -automatically if it doesn't exist. For more information, see <>. - -`<_id>`:: -(Required, string) Unique identifier for the document to be updated. - -[[docs-update-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=if_seq_no] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=if_primary_term] - -`lang`:: -(Optional, string) The script language. Default: `painless`. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=require-alias] - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=refresh] - -`retry_on_conflict`:: -(Optional, integer) Specify how many times should the operation be retried when - a conflict occurs. Default: 0. - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=routing] - -`_source`:: -(Optional, list) Set to `true` to enable source retrieval (default: `false`). -You can also specify a comma-separated list of the fields you want to retrieve. - -`_source_excludes`:: -(Optional, list) Specify the source fields you want to exclude. - -`_source_includes`:: -(Optional, list) Specify the source fields you want to retrieve. - -`timeout`:: -+ --- -(Optional, <>) -Period to wait for the following operations: - -* <> updates -* <> - -Defaults to `1m` (one minute). This guarantees {es} waits for at least the -timeout before failing. The actual wait time could be longer, particularly when -multiple waits occur. --- - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=wait_for_active_shards] - -[[update-api-example]] -==== {api-examples-title} - -First, let's index a simple doc: - -[source,console] ----- -PUT test/_doc/1 -{ - "counter" : 1, - "tags" : ["red"] -} ----- -// TESTSETUP - -To increment the counter, you can submit an update request with the -following script: - -[source,console] ----- -POST test/_update/1 -{ - "script" : { - "source": "ctx._source.counter += params.count", - "lang": "painless", - "params" : { - "count" : 4 - } - } -} ----- - -Similarly, you could use and update script to add a tag to the list of tags -(this is just a list, so the tag is added even it exists): - -[source,console] ----- -POST test/_update/1 -{ - "script": { - "source": "ctx._source.tags.add(params.tag)", - "lang": "painless", - "params": { - "tag": "blue" - } - } -} ----- - -You could also remove a tag from the list of tags. The Painless -function to `remove` a tag takes the array index of the element -you want to remove. To avoid a possible runtime error, you first need to -make sure the tag exists. If the list contains duplicates of the tag, this -script just removes one occurrence. - -[source,console] ----- -POST test/_update/1 -{ - "script": { - "source": "if (ctx._source.tags.contains(params.tag)) { ctx._source.tags.remove(ctx._source.tags.indexOf(params.tag)) }", - "lang": "painless", - "params": { - "tag": "blue" - } - } -} ----- - -You can also add and remove fields from a document. For example, this script -adds the field `new_field`: - -[source,console] ----- -POST test/_update/1 -{ - "script" : "ctx._source.new_field = 'value_of_new_field'" -} ----- - -Conversely, this script removes the field `new_field`: - -[source,console] ----- -POST test/_update/1 -{ - "script" : "ctx._source.remove('new_field')" -} ----- -// TEST[continued] - -The following script removes a subfield from an object field: - -//// -[source,console] ----- -PUT test/_doc/1?refresh -{ - "my-object": { - "my-subfield": true - } -} ----- -//// - -[source,console] ----- -POST test/_update/1 -{ - "script": "ctx._source['my-object'].remove('my-subfield')" -} ----- -// TEST[continued] - -Instead of updating the document, you can also change the operation that is -executed from within the script. For example, this request deletes the doc if -the `tags` field contains `green`, otherwise it does nothing (`noop`): - -[source,console] ----- -POST test/_update/1 -{ - "script": { - "source": "if (ctx._source.tags.contains(params.tag)) { ctx.op = 'delete' } else { ctx.op = 'noop' }", - "lang": "painless", - "params": { - "tag": "green" - } - } -} ----- - -[discrete] -===== Update part of a document - -The following partial update adds a new field to the -existing document: - -[source,console] ----- -POST test/_update/1 -{ - "doc": { - "name": "new_name" - } -} ----- - -If both `doc` and `script` are specified, then `doc` is ignored. If you -specify a scripted update, include the fields you want to update in the script. - -[discrete] -===== Detect noop updates - -By default updates that don't change anything detect that they don't change -anything and return `"result": "noop"`: - -[source,console] ----- -POST test/_update/1 -{ - "doc": { - "name": "new_name" - } -} ----- -// TEST[continued] - -If the value of `name` is already `new_name`, the update -request is ignored and the `result` element in the response returns `noop`: - -[source,console-result] ----- -{ - "_shards": { - "total": 0, - "successful": 0, - "failed": 0 - }, - "_index": "test", - "_id": "1", - "_version": 2, - "_primary_term": 1, - "_seq_no": 1, - "result": "noop" -} ----- - -You can disable this behavior by setting `"detect_noop": false`: - -[source,console] ----- -POST test/_update/1 -{ - "doc": { - "name": "new_name" - }, - "detect_noop": false -} ----- - -[[upserts]] -[discrete] -===== Upsert - -An upsert operation lets you update an existing document or insert a new one if it doesn't exist, in a single request. - -In this example, if the product with ID `1` exists, its price will be updated to `100`. If the product does not exist, a new document with ID `1` and a price of `50` will be inserted. - -[source,console] ----- -POST /test/_update/1 -{ - "doc": { - "product_price": 100 - }, - "upsert": { - "product_price": 50 - } -} ----- - -[discrete] -[[scripted_upsert]] -===== Scripted upsert - -To run the script whether or not the document exists, set `scripted_upsert` to -`true`: - -[source,console] ----- -POST test/_update/1 -{ - "scripted_upsert": true, - "script": { - "source": """ - if ( ctx.op == 'create' ) { - ctx._source.counter = params.count - } else { - ctx._source.counter += params.count - } - """, - "params": { - "count": 4 - } - }, - "upsert": {} -} ----- - -[discrete] -[[doc_as_upsert]] -===== Doc as upsert - -Instead of sending a partial `doc` plus an `upsert` doc, you can set -`doc_as_upsert` to `true` to use the contents of `doc` as the `upsert` -value: - -[source,console] ----- -POST test/_update/1 -{ - "doc": { - "name": "new_name" - }, - "doc_as_upsert": true -} ----- - -[NOTE] -==== -Using <> with `doc_as_upsert` is not supported. -==== diff --git a/docs/reference/elasticsearch-plugins/_other_command_line_parameters.md b/docs/reference/elasticsearch-plugins/_other_command_line_parameters.md new file mode 100644 index 0000000000000..8bb72f05f5bc4 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/_other_command_line_parameters.md @@ -0,0 +1,62 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/_other_command_line_parameters.html +--- + +# Other command line parameters [_other_command_line_parameters] + +The `plugin` scripts supports a number of other command line parameters: + + +## Silent/verbose mode [_silentverbose_mode] + +The `--verbose` parameter outputs more debug information, while the `--silent` parameter turns off all output including the progress bar. The script may return the following exit codes: + +`0` +: everything was OK + +`64` +: unknown command or incorrect option parameter + +`74` +: IO error + +`70` +: any other error + + +## Batch mode [_batch_mode] + +Certain plugins require more privileges than those provided by default in core Elasticsearch. These plugins will list the required privileges and ask the user for confirmation before continuing with installation. + +When running the plugin install script from another program (e.g. install automation scripts), the plugin script should detect that it is not being called from the console and skip the confirmation response, automatically granting all requested permissions. If console detection fails, then batch mode can be forced by specifying `-b` or `--batch` as follows: + +```shell +sudo bin/elasticsearch-plugin install --batch [pluginname] +``` + + +## Custom config directory [_custom_config_directory] + +If your `elasticsearch.yml` config file is in a custom location, you will need to specify the path to the config file when using the `plugin` script. You can do this as follows: + +```sh +sudo ES_PATH_CONF=/path/to/conf/dir bin/elasticsearch-plugin install +``` + + +## Proxy settings [_proxy_settings] + +To install a plugin via a proxy, you can add the proxy details to the `CLI_JAVA_OPTS` environment variable with the Java settings `http.proxyHost` and `http.proxyPort` (or `https.proxyHost` and `https.proxyPort`): + +```shell +sudo CLI_JAVA_OPTS="-Dhttp.proxyHost=host_name -Dhttp.proxyPort=port_number -Dhttps.proxyHost=host_name -Dhttps.proxyPort=https_port_number" bin/elasticsearch-plugin install analysis-icu +``` + +Or on Windows: + +```shell +set CLI_JAVA_OPTS="-Dhttp.proxyHost=host_name -Dhttp.proxyPort=port_number -Dhttps.proxyHost=host_name -Dhttps.proxyPort=https_port_number" +bin\elasticsearch-plugin install analysis-icu +``` + diff --git a/docs/reference/elasticsearch-plugins/_plugins_directory.md b/docs/reference/elasticsearch-plugins/_plugins_directory.md new file mode 100644 index 0000000000000..9df6b313944da --- /dev/null +++ b/docs/reference/elasticsearch-plugins/_plugins_directory.md @@ -0,0 +1,14 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/_plugins_directory.html +--- + +# Plugins directory [_plugins_directory] + +The default location of the `plugins` directory depends on which package you install: + +* [Directory layout of `.tar.gz` archives](docs-content://deploy-manage/deploy/self-managed/install-elasticsearch-from-archive-on-linux-macos.md#targz-layout) +* [Directory layout of Windows `.zip` archives](docs-content://deploy-manage/deploy/self-managed/install-elasticsearch-with-zip-on-windows.md#windows-layout) +* [Directory layout of Debian package](docs-content://deploy-manage/deploy/self-managed/install-elasticsearch-with-debian-package.md#deb-layout) +* [Directory layout of RPM](docs-content://deploy-manage/deploy/self-managed/install-elasticsearch-with-rpm.md#rpm-layout) + diff --git a/docs/reference/elasticsearch-plugins/_reimplementing_and_extending_the_analyzers.md b/docs/reference/elasticsearch-plugins/_reimplementing_and_extending_the_analyzers.md new file mode 100644 index 0000000000000..8c4d1c19be17f --- /dev/null +++ b/docs/reference/elasticsearch-plugins/_reimplementing_and_extending_the_analyzers.md @@ -0,0 +1,28 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/_reimplementing_and_extending_the_analyzers.html +--- + +# Reimplementing and extending the analyzers [_reimplementing_and_extending_the_analyzers] + +The `smartcn` analyzer could be reimplemented as a `custom` analyzer that can then be extended and configured as follows: + +```console +PUT smartcn_example +{ + "settings": { + "analysis": { + "analyzer": { + "rebuilt_smartcn": { + "tokenizer": "smartcn_tokenizer", + "filter": [ + "porter_stem", + "smartcn_stop" + ] + } + } + } + } +} +``` + diff --git a/docs/reference/elasticsearch-plugins/_reimplementing_and_extending_the_analyzers_2.md b/docs/reference/elasticsearch-plugins/_reimplementing_and_extending_the_analyzers_2.md new file mode 100644 index 0000000000000..a3f996bf3fc58 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/_reimplementing_and_extending_the_analyzers_2.md @@ -0,0 +1,29 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/_reimplementing_and_extending_the_analyzers_2.html +--- + +# Reimplementing and extending the analyzers [_reimplementing_and_extending_the_analyzers_2] + +The `polish` analyzer could be reimplemented as a `custom` analyzer that can then be extended and configured differently as follows: + +```console +PUT /stempel_example +{ + "settings": { + "analysis": { + "analyzer": { + "rebuilt_stempel": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "polish_stop", + "polish_stem" + ] + } + } + } + } +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-icu-analyzer.md b/docs/reference/elasticsearch-plugins/analysis-icu-analyzer.md new file mode 100644 index 0000000000000..ce026722c821a --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-icu-analyzer.md @@ -0,0 +1,17 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-analyzer.html +--- + +# ICU analyzer [analysis-icu-analyzer] + +The `icu_analyzer` analyzer performs basic normalization, tokenization and character folding, using the `icu_normalizer` char filter, `icu_tokenizer` and `icu_folding` token filter + +The following parameters are accepted: + +`method` +: Normalization method. Accepts `nfkc`, `nfc` or `nfkc_cf` (default) + +`mode` +: Normalization mode. Accepts `compose` (default) or `decompose`. + diff --git a/docs/reference/elasticsearch-plugins/analysis-icu-collation-keyword-field.md b/docs/reference/elasticsearch-plugins/analysis-icu-collation-keyword-field.md new file mode 100644 index 0000000000000..add91856d3ce7 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-icu-collation-keyword-field.md @@ -0,0 +1,102 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-collation-keyword-field.html +--- + +# ICU collation keyword field [analysis-icu-collation-keyword-field] + +Collations are used for sorting documents in a language-specific word order. The `icu_collation_keyword` field type is available to all indices and will encode the terms directly as bytes in a doc values field and a single indexed token just like a standard [Keyword Field](/reference/elasticsearch/mapping-reference/keyword.md). + +Defaults to using [DUCET collation](https://www.elastic.co/guide/en/elasticsearch/guide/2.x/sorting-collations.html#uca), which is a best-effort attempt at language-neutral sorting. + +Below is an example of how to set up a field for sorting German names in phonebook order: + +```console +PUT my-index-000001 +{ + "mappings": { + "properties": { + "name": { <1> + "type": "text", + "fields": { + "sort": { <2> + "type": "icu_collation_keyword", + "index": false, + "language": "de", + "country": "DE", + "variant": "@collation=phonebook" + } + } + } + } + } +} + +GET /my-index-000001/_search <3> +{ + "query": { + "match": { + "name": "Fritz" + } + }, + "sort": "name.sort" +} +``` + +1. The `name` field uses the `standard` analyzer, and so supports full text queries. +2. The `name.sort` field is an `icu_collation_keyword` field that will preserve the name as a single token doc values, and applies the German phonebook order. +3. An example query which searches the `name` field and sorts on the `name.sort` field. + + +## Parameters for ICU collation keyword fields [_parameters_for_icu_collation_keyword_fields] + +The following parameters are accepted by `icu_collation_keyword` fields: + +`doc_values` +: Should the field be stored on disk in a column-stride fashion, so that it can later be used for sorting, aggregations, or scripting? Accepts `true` (default) or `false`. + +`index` +: Should the field be searchable? Accepts `true` (default) or `false`. + +`null_value` +: Accepts a string value which is substituted for any explicit `null` values. Defaults to `null`, which means the field is treated as missing. + +[`ignore_above`](/reference/elasticsearch/mapping-reference/ignore-above.md) +: Strings longer than the `ignore_above` setting will be ignored. Checking is performed on the original string before the collation. The `ignore_above` setting can be updated on existing fields using the [PUT mapping API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-mapping). By default, there is no limit and all values will be indexed. + +`store` +: Whether the field value should be stored and retrievable separately from the [`_source`](/reference/elasticsearch/mapping-reference/mapping-source-field.md) field. Accepts `true` or `false` (default). + +`fields` +: Multi-fields allow the same string value to be indexed in multiple ways for different purposes, such as one field for search and a multi-field for sorting and aggregations. + + +## Collation options [_collation_options] + +`strength` +: The strength property determines the minimum level of difference considered significant during comparison. Possible values are : `primary`, `secondary`, `tertiary`, `quaternary` or `identical`. See the [ICU Collation documentation](https://icu-project.org/apiref/icu4j/com/ibm/icu/text/Collator.md) for a more detailed explanation for each value. Defaults to `tertiary` unless otherwise specified in the collation. + +`decomposition` +: Possible values: `no` (default, but collation-dependent) or `canonical`. Setting this decomposition property to `canonical` allows the Collator to handle unnormalized text properly, producing the same results as if the text were normalized. If `no` is set, it is the user’s responsibility to ensure that all text is already in the appropriate form before a comparison or before getting a CollationKey. Adjusting decomposition mode allows the user to select between faster and more complete collation behavior. Since a great many of the world’s languages do not require text normalization, most locales set `no` as the default decomposition mode. + +The following options are expert only: + +`alternate` +: Possible values: `shifted` or `non-ignorable`. Sets the alternate handling for strength `quaternary` to be either shifted or non-ignorable. Which boils down to ignoring punctuation and whitespace. + +`case_level` +: Possible values: `true` or `false` (default). Whether case level sorting is required. When strength is set to `primary` this will ignore accent differences. + +`case_first` +: Possible values: `lower` or `upper`. Useful to control which case is sorted first when the case is not ignored for strength `tertiary`. The default depends on the collation. + +`numeric` +: Possible values: `true` or `false` (default) . Whether digits are sorted according to their numeric representation. For example the value `egg-9` is sorted before the value `egg-21`. + +`variable_top` +: Single character or contraction. Controls what is variable for `alternate`. + +`hiragana_quaternary_mode` +: Possible values: `true` or `false`. Distinguishing between Katakana and Hiragana characters in `quaternary` strength. + + diff --git a/docs/reference/elasticsearch-plugins/analysis-icu-collation.md b/docs/reference/elasticsearch-plugins/analysis-icu-collation.md new file mode 100644 index 0000000000000..0d68e446bdebf --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-icu-collation.md @@ -0,0 +1,13 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-collation.html +--- + +# ICU collation token filter [analysis-icu-collation] + +::::{warning} +This token filter has been deprecated since Lucene 5.0. Please use [ICU Collation Keyword Field](/reference/elasticsearch-plugins/analysis-icu-collation-keyword-field.md). + +:::: + + diff --git a/docs/reference/elasticsearch-plugins/analysis-icu-folding.md b/docs/reference/elasticsearch-plugins/analysis-icu-folding.md new file mode 100644 index 0000000000000..4227f4902683b --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-icu-folding.md @@ -0,0 +1,62 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-folding.html +--- + +# ICU folding token filter [analysis-icu-folding] + +Case folding of Unicode characters based on `UTR#30`, like the [ASCII-folding token filter](/reference/data-analysis/text-analysis/analysis-asciifolding-tokenfilter.md) on steroids. It registers itself as the `icu_folding` token filter and is available to all indices: + +```console +PUT icu_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "folded": { + "tokenizer": "icu_tokenizer", + "filter": [ + "icu_folding" + ] + } + } + } + } + } +} +``` + +The ICU folding token filter already does Unicode normalization, so there is no need to use Normalize character or token filter as well. + +Which letters are folded can be controlled by specifying the `unicode_set_filter` parameter, which accepts a [UnicodeSet](https://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.md). + +The following example exempts Swedish characters from folding. It is important to note that both upper and lowercase forms should be specified, and that these filtered character are not lowercased which is why we add the `lowercase` filter as well: + +```console +PUT icu_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "swedish_analyzer": { + "tokenizer": "icu_tokenizer", + "filter": [ + "swedish_folding", + "lowercase" + ] + } + }, + "filter": { + "swedish_folding": { + "type": "icu_folding", + "unicode_set_filter": "[^åäöÅÄÖ]" + } + } + } + } + } +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-icu-normalization-charfilter.md b/docs/reference/elasticsearch-plugins/analysis-icu-normalization-charfilter.md new file mode 100644 index 0000000000000..4c05664d6ef7a --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-icu-normalization-charfilter.md @@ -0,0 +1,50 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-normalization-charfilter.html +--- + +# ICU normalization character filter [analysis-icu-normalization-charfilter] + +Normalizes characters as explained [here](https://unicode-org.github.io/icu/userguide/transforms/normalization/). It registers itself as the `icu_normalizer` character filter, which is available to all indices without any further configuration. The type of normalization can be specified with the `name` parameter, which accepts `nfc`, `nfkc`, and `nfkc_cf` (default). Set the `mode` parameter to `decompose` to convert `nfc` to `nfd` or `nfkc` to `nfkd` respectively: + +Which letters are normalized can be controlled by specifying the `unicode_set_filter` parameter, which accepts a [UnicodeSet](https://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.md). + +Here are two examples, the default usage and a customised character filter: + +```console +PUT icu_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "nfkc_cf_normalized": { <1> + "tokenizer": "icu_tokenizer", + "char_filter": [ + "icu_normalizer" + ] + }, + "nfd_normalized": { <2> + "tokenizer": "icu_tokenizer", + "char_filter": [ + "nfd_normalizer" + ] + } + }, + "char_filter": { + "nfd_normalizer": { + "type": "icu_normalizer", + "name": "nfc", + "mode": "decompose" + } + } + } + } + } +} +``` + +1. Uses the default `nfkc_cf` normalization. +2. Uses the customized `nfd_normalizer` token filter, which is set to use `nfc` normalization with decomposition. + + diff --git a/docs/reference/elasticsearch-plugins/analysis-icu-normalization.md b/docs/reference/elasticsearch-plugins/analysis-icu-normalization.md new file mode 100644 index 0000000000000..414bcfc1521c8 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-icu-normalization.md @@ -0,0 +1,51 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-normalization.html +--- + +# ICU normalization token filter [analysis-icu-normalization] + +Normalizes characters as explained [here](https://unicode-org.github.io/icu/userguide/transforms/normalization/). It registers itself as the `icu_normalizer` token filter, which is available to all indices without any further configuration. The type of normalization can be specified with the `name` parameter, which accepts `nfc`, `nfkc`, and `nfkc_cf` (default). + +Which letters are normalized can be controlled by specifying the `unicode_set_filter` parameter, which accepts a [UnicodeSet](https://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.md). + +You should probably prefer the [Normalization character filter](/reference/elasticsearch-plugins/analysis-icu-normalization-charfilter.md). + +Here are two examples, the default usage and a customised token filter: + +```console +PUT icu_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "nfkc_cf_normalized": { <1> + "tokenizer": "icu_tokenizer", + "filter": [ + "icu_normalizer" + ] + }, + "nfc_normalized": { <2> + "tokenizer": "icu_tokenizer", + "filter": [ + "nfc_normalizer" + ] + } + }, + "filter": { + "nfc_normalizer": { + "type": "icu_normalizer", + "name": "nfc" + } + } + } + } + } +} +``` + +1. Uses the default `nfkc_cf` normalization. +2. Uses the customized `nfc_normalizer` token filter, which is set to use `nfc` normalization. + + diff --git a/docs/reference/elasticsearch-plugins/analysis-icu-tokenizer.md b/docs/reference/elasticsearch-plugins/analysis-icu-tokenizer.md new file mode 100644 index 0000000000000..7aeb05334fbd5 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-icu-tokenizer.md @@ -0,0 +1,92 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-tokenizer.html +--- + +# ICU tokenizer [analysis-icu-tokenizer] + +Tokenizes text into words on word boundaries, as defined in [UAX #29: Unicode Text Segmentation](https://www.unicode.org/reports/tr29/). It behaves much like the [`standard` tokenizer](/reference/data-analysis/text-analysis/analysis-standard-tokenizer.md), but adds better support for some Asian languages by using a dictionary-based approach to identify words in Thai, Lao, Chinese, Japanese, and Korean, and using custom rules to break Myanmar and Khmer text into syllables. + +```console +PUT icu_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_icu_analyzer": { + "tokenizer": "icu_tokenizer" + } + } + } + } + } +} +``` + +## Rules customization [_rules_customization] + +::::{warning} +This functionality is marked as experimental in Lucene +:::: + + +You can customize the `icu-tokenizer` behavior by specifying per-script rule files, see the [RBBI rules syntax reference](http://userguide.icu-project.org/boundaryanalysis#TOC-RBBI-Rules) for a more detailed explanation. + +To add icu tokenizer rules, set the `rule_files` settings, which should contain a comma-separated list of `code:rulefile` pairs in the following format: [four-letter ISO 15924 script code](https://unicode.org/iso15924/iso15924-codes.md), followed by a colon, then a rule file name. Rule files are placed `ES_HOME/config` directory. + +As a demonstration of how the rule files can be used, save the following user file to `$ES_HOME/config/KeywordTokenizer.rbbi`: + +```text +.+ {200}; +``` + +Then create an analyzer to use this rule file as follows: + +```console +PUT icu_sample +{ + "settings": { + "index": { + "analysis": { + "tokenizer": { + "icu_user_file": { + "type": "icu_tokenizer", + "rule_files": "Latn:KeywordTokenizer.rbbi" + } + }, + "analyzer": { + "my_analyzer": { + "type": "custom", + "tokenizer": "icu_user_file" + } + } + } + } + } +} + +GET icu_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "Elasticsearch. Wow!" +} +``` + +The above `analyze` request returns the following: + +```console-result +{ + "tokens": [ + { + "token": "Elasticsearch. Wow!", + "start_offset": 0, + "end_offset": 19, + "type": "", + "position": 0 + } + ] +} +``` + + diff --git a/docs/reference/elasticsearch-plugins/analysis-icu-transform.md b/docs/reference/elasticsearch-plugins/analysis-icu-transform.md new file mode 100644 index 0000000000000..2beae8ae4b3fe --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-icu-transform.md @@ -0,0 +1,65 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-transform.html +--- + +# ICU transform token filter [analysis-icu-transform] + +Transforms are used to process Unicode text in many different ways, such as case mapping, normalization, transliteration and bidirectional text handling. + +You can define which transformation you want to apply with the `id` parameter (defaults to `Null`), and specify text direction with the `dir` parameter which accepts `forward` (default) for LTR and `reverse` for RTL. Custom rulesets are not yet supported. + +For example: + +```console +PUT icu_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "latin": { + "tokenizer": "keyword", + "filter": [ + "myLatinTransform" + ] + } + }, + "filter": { + "myLatinTransform": { + "type": "icu_transform", + "id": "Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC" <1> + } + } + } + } + } +} + +GET icu_sample/_analyze +{ + "analyzer": "latin", + "text": "你好" <2> +} + +GET icu_sample/_analyze +{ + "analyzer": "latin", + "text": "здравствуйте" <3> +} + +GET icu_sample/_analyze +{ + "analyzer": "latin", + "text": "こんにちは" <4> +} +``` + +1. This transforms transliterates characters to Latin, and separates accents from their base characters, removes the accents, and then puts the remaining text into an unaccented form. +2. Returns `ni hao`. +3. Returns `zdravstvujte`. +4. Returns `kon'nichiha`. + + +For more documentation, Please see the [user guide of ICU Transform](https://unicode-org.github.io/icu/userguide/transforms/). + diff --git a/docs/reference/elasticsearch-plugins/analysis-icu.md b/docs/reference/elasticsearch-plugins/analysis-icu.md new file mode 100644 index 0000000000000..cf2ab06edfa21 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-icu.md @@ -0,0 +1,56 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu.html +--- + +# ICU analysis plugin [analysis-icu] + +The ICU Analysis plugin integrates the Lucene ICU module into {{es}}, adding extended Unicode support using the [ICU](https://icu.unicode.org/) libraries, including better analysis of Asian languages, Unicode normalization, Unicode-aware case folding, collation support, and transliteration. + +::::{admonition} ICU analysis and backwards compatibility +:class: important + +From time to time, the ICU library receives updates such as adding new characters and emojis, and improving collation (sort) orders. These changes may or may not affect search and sort orders, depending on which characters sets you are using. + +While we restrict ICU upgrades to major versions, you may find that an index created in the previous major version will need to be reindexed in order to return correct (and correctly ordered) results, and to take advantage of new characters. + +:::: + + + +## Installation [analysis-icu-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install analysis-icu +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-icu/analysis-icu-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-icu/analysis-icu-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-icu/analysis-icu-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-icu/analysis-icu-9.0.0-beta1.zip.asc). + + +## Removal [analysis-icu-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove analysis-icu +``` + +The node must be stopped before removing the plugin. + + + + + + + + + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji-analyzer.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji-analyzer.md new file mode 100644 index 0000000000000..8a085497d739f --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji-analyzer.md @@ -0,0 +1,62 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji-analyzer.html +--- + +# kuromoji analyzer [analysis-kuromoji-analyzer] + +The `kuromoji` analyzer uses the following analysis chain: + +* `CJKWidthCharFilter` from Lucene +* [`kuromoji_tokenizer`](/reference/elasticsearch-plugins/analysis-kuromoji-tokenizer.md) +* [`kuromoji_baseform`](/reference/elasticsearch-plugins/analysis-kuromoji-baseform.md) token filter +* [`kuromoji_part_of_speech`](/reference/elasticsearch-plugins/analysis-kuromoji-speech.md) token filter +* [`ja_stop`](/reference/elasticsearch-plugins/analysis-kuromoji-stop.md) token filter +* [`kuromoji_stemmer`](/reference/elasticsearch-plugins/analysis-kuromoji-stemmer.md) token filter +* [`lowercase`](/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md) token filter + +It supports the `mode` and `user_dictionary` settings from [`kuromoji_tokenizer`](/reference/elasticsearch-plugins/analysis-kuromoji-tokenizer.md). + + +## Normalize full-width characters [kuromoji-analyzer-normalize-full-width-characters] + +The `kuromoji_tokenizer` tokenizer uses characters from the MeCab-IPADIC dictionary to split text into tokens. The dictionary includes some full-width characters, such as `o` and `f`. If a text contains full-width characters, the tokenizer can produce unexpected tokens. + +For example, the `kuromoji_tokenizer` tokenizer converts the text `Culture of Japan` to the tokens `[ culture, o, f, japan ]` instead of `[ culture, of, japan ]`. + +To avoid this, add the [`icu_normalizer` character filter](/reference/elasticsearch-plugins/analysis-icu-normalization-charfilter.md) to a custom analyzer based on the `kuromoji` analyzer. The `icu_normalizer` character filter converts full-width characters to their normal equivalents. + +First, duplicate the `kuromoji` analyzer to create the basis for a custom analyzer. Then add the `icu_normalizer` character filter to the custom analyzer. For example: + +```console +PUT index-00001 +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "kuromoji_normalize": { <1> + "char_filter": [ + "icu_normalizer" <2> + ], + "tokenizer": "kuromoji_tokenizer", + "filter": [ + "kuromoji_baseform", + "kuromoji_part_of_speech", + "cjk_width", + "ja_stop", + "kuromoji_stemmer", + "lowercase" + ] + } + } + } + } + } +} +``` + +1. Creates a new custom analyzer, `kuromoji_normalize`, based on the `kuromoji` analyzer. +2. Adds the `icu_normalizer` character filter to the analyzer. + + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji-baseform.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji-baseform.md new file mode 100644 index 0000000000000..56bad65eca796 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji-baseform.md @@ -0,0 +1,49 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji-baseform.html +--- + +# kuromoji_baseform token filter [analysis-kuromoji-baseform] + +The `kuromoji_baseform` token filter replaces terms with their BaseFormAttribute. This acts as a lemmatizer for verbs and adjectives. Example: + +```console +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ + "kuromoji_baseform" + ] + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "飲み" +} +``` + +which responds with: + +```console-result +{ + "tokens" : [ { + "token" : "飲む", + "start_offset" : 0, + "end_offset" : 2, + "type" : "word", + "position" : 0 + } ] +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji-charfilter.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji-charfilter.md new file mode 100644 index 0000000000000..71f4999482ad4 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji-charfilter.md @@ -0,0 +1,15 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji-charfilter.html +--- + +# kuromoji_iteration_mark character filter [analysis-kuromoji-charfilter] + +The `kuromoji_iteration_mark` normalizes Japanese horizontal iteration marks (*odoriji*) to their expanded form. It accepts the following settings: + +`normalize_kanji` +: Indicates whether kanji iteration marks should be normalized. Defaults to `true`. + +`normalize_kana` +: Indicates whether kana iteration marks should be normalized. Defaults to `true` + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji-completion.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji-completion.md new file mode 100644 index 0000000000000..0d0860c8d8928 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji-completion.md @@ -0,0 +1,34 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji-completion.html +--- + +# kuromoji_completion token filter [analysis-kuromoji-completion] + +The `kuromoji_completion` token filter adds Japanese romanized tokens to the term attributes along with the original tokens (surface forms). + +```console +GET _analyze +{ + "analyzer": "kuromoji_completion", + "text": "寿司" <1> +} +``` + +1. Returns `寿司`, `susi` (Kunrei-shiki) and `sushi` (Hepburn-shiki). + + +The `kuromoji_completion` token filter accepts the following settings: + +`mode` +: The tokenization mode determines how the tokenizer handles compound and unknown words. It can be set to: + +`index` +: Simple romanization. Expected to be used when indexing. + +`query` +: Input Method aware romanization. Expected to be used when querying. + +Defaults to `index`. + + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji-hiragana-uppercase.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji-hiragana-uppercase.md new file mode 100644 index 0000000000000..18043427ad54e --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji-hiragana-uppercase.md @@ -0,0 +1,67 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji-hiragana-uppercase.html +--- + +# hiragana_uppercase token filter [analysis-kuromoji-hiragana-uppercase] + +The `hiragana_uppercase` token filter normalizes small letters (捨て仮名) in hiragana into standard letters. This filter is useful if you want to search against old style Japanese text such as patents, legal documents, contract policies, etc. + +For example: + +```console +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ + "hiragana_uppercase" + ] + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "ちょっとまって" +} +``` + +Which results in: + +```console-result +{ + "tokens": [ + { + "token": "ちよつと", + "start_offset": 0, + "end_offset": 4, + "type": "word", + "position": 0 + }, + { + "token": "まつ", + "start_offset": 4, + "end_offset": 6, + "type": "word", + "position": 1 + }, + { + "token": "て", + "start_offset": 6, + "end_offset": 7, + "type": "word", + "position": 2 + } + ] +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji-katakana-uppercase.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji-katakana-uppercase.md new file mode 100644 index 0000000000000..74e3c293264cf --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji-katakana-uppercase.md @@ -0,0 +1,53 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji-katakana-uppercase.html +--- + +# katakana_uppercase token filter [analysis-kuromoji-katakana-uppercase] + +The `katakana_uppercase` token filter normalizes small letters (捨て仮名) in katakana into standard letters. This filter is useful if you want to search against old style Japanese text such as patents, legal documents, contract policies, etc. + +For example: + +```console +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ + "katakana_uppercase" + ] + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "ストップウォッチ" +} +``` + +Which results in: + +```console-result +{ + "tokens": [ + { + "token": "ストツプウオツチ", + "start_offset": 0, + "end_offset": 8, + "type": "word", + "position": 0 + } + ] +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji-number.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji-number.md new file mode 100644 index 0000000000000..349a350d8d224 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji-number.md @@ -0,0 +1,49 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji-number.html +--- + +# kuromoji_number token filter [analysis-kuromoji-number] + +The `kuromoji_number` token filter normalizes Japanese numbers (kansūji) to regular Arabic decimal numbers in half-width characters. For example: + +```console +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ + "kuromoji_number" + ] + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "一〇〇〇" +} +``` + +Which results in: + +```console-result +{ + "tokens" : [ { + "token" : "1000", + "start_offset" : 0, + "end_offset" : 4, + "type" : "word", + "position" : 0 + } ] +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji-readingform.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji-readingform.md new file mode 100644 index 0000000000000..4afdadffabe6a --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji-readingform.md @@ -0,0 +1,62 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji-readingform.html +--- + +# kuromoji_readingform token filter [analysis-kuromoji-readingform] + +The `kuromoji_readingform` token filter replaces the token with its reading form in either katakana or romaji. It accepts the following setting: + +`use_romaji` +: Whether romaji reading form should be output instead of katakana. Defaults to `false`. + +When using the pre-defined `kuromoji_readingform` filter, `use_romaji` is set to `true`. The default when defining a custom `kuromoji_readingform`, however, is `false`. The only reason to use the custom form is if you need the katakana reading form: + +```console +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "romaji_analyzer": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ "romaji_readingform" ] + }, + "katakana_analyzer": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ "katakana_readingform" ] + } + }, + "filter": { + "romaji_readingform": { + "type": "kuromoji_readingform", + "use_romaji": true + }, + "katakana_readingform": { + "type": "kuromoji_readingform", + "use_romaji": false + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "katakana_analyzer", + "text": "寿司" <1> +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "romaji_analyzer", + "text": "寿司" <2> +} +``` + +1. Returns `スシ`. +2. Returns `sushi`. + + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji-speech.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji-speech.md new file mode 100644 index 0000000000000..1d1e2bb1e76f4 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji-speech.md @@ -0,0 +1,69 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji-speech.html +--- + +# kuromoji_part_of_speech token filter [analysis-kuromoji-speech] + +The `kuromoji_part_of_speech` token filter removes tokens that match a set of part-of-speech tags. It accepts the following setting: + +`stoptags` +: An array of part-of-speech tags that should be removed. It defaults to the `stoptags.txt` file embedded in the `lucene-analyzer-kuromoji.jar`. + +For example: + +```console +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ + "my_posfilter" + ] + } + }, + "filter": { + "my_posfilter": { + "type": "kuromoji_part_of_speech", + "stoptags": [ + "助詞-格助詞-一般", + "助詞-終助詞" + ] + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "寿司がおいしいね" +} +``` + +Which responds with: + +```console-result +{ + "tokens" : [ { + "token" : "寿司", + "start_offset" : 0, + "end_offset" : 2, + "type" : "word", + "position" : 0 + }, { + "token" : "おいしい", + "start_offset" : 3, + "end_offset" : 7, + "type" : "word", + "position" : 2 + } ] +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji-stemmer.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji-stemmer.md new file mode 100644 index 0000000000000..7cfec9f5f4c13 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji-stemmer.md @@ -0,0 +1,56 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji-stemmer.html +--- + +# kuromoji_stemmer token filter [analysis-kuromoji-stemmer] + +The `kuromoji_stemmer` token filter normalizes common katakana spelling variations ending in a long sound character by removing this character (U+30FC). Only full-width katakana characters are supported. + +This token filter accepts the following setting: + +`minimum_length` +: Katakana words shorter than the `minimum length` are not stemmed (default is `4`). + +```console +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ + "my_katakana_stemmer" + ] + } + }, + "filter": { + "my_katakana_stemmer": { + "type": "kuromoji_stemmer", + "minimum_length": 4 + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "コピー" <1> +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "サーバー" <2> +} +``` + +1. Returns `コピー`. +2. Return `サーバ`. + + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji-stop.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji-stop.md new file mode 100644 index 0000000000000..001839a8828cd --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji-stop.md @@ -0,0 +1,58 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji-stop.html +--- + +# ja_stop token filter [analysis-kuromoji-stop] + +The `ja_stop` token filter filters out Japanese stopwords (`_japanese_`), and any other custom stopwords specified by the user. This filter only supports the predefined `_japanese_` stopwords list. If you want to use a different predefined list, then use the [`stop` token filter](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) instead. + +```console +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "analyzer_with_ja_stop": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ + "ja_stop" + ] + } + }, + "filter": { + "ja_stop": { + "type": "ja_stop", + "stopwords": [ + "_japanese_", + "ストップ" + ] + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "analyzer_with_ja_stop", + "text": "ストップは消える" +} +``` + +The above request returns: + +```console-result +{ + "tokens" : [ { + "token" : "消える", + "start_offset" : 5, + "end_offset" : 8, + "type" : "word", + "position" : 2 + } ] +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji-tokenizer.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji-tokenizer.md new file mode 100644 index 0000000000000..f2c9acc6cabdb --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji-tokenizer.md @@ -0,0 +1,165 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji-tokenizer.html +--- + +# kuromoji_tokenizer [analysis-kuromoji-tokenizer] + +The `kuromoji_tokenizer` accepts the following settings: + +`mode` +: The tokenization mode determines how the tokenizer handles compound and unknown words. It can be set to: + +`normal` +: Normal segmentation, no decomposition for compounds. Example output: + + ``` + 関西国際空港 + アブラカダブラ + ``` + + +`search` +: Segmentation geared towards search. This includes a decompounding process for long nouns, also including the full compound token as a synonym. Example output: + + ``` + 関西, 関西国際空港, 国際, 空港 + アブラカダブラ + ``` + + +`extended` +: Extended mode outputs unigrams for unknown words. Example output: + + ``` + 関西, 関西国際空港, 国際, 空港 + ア, ブ, ラ, カ, ダ, ブ, ラ + ``` + + + +`discard_punctuation` +: Whether punctuation should be discarded from the output. Defaults to `true`. + +`lenient` +: Whether the `user_dictionary` should be deduplicated on the provided `text`. False by default causing duplicates to generate an error. + +`user_dictionary` +: The Kuromoji tokenizer uses the MeCab-IPADIC dictionary by default. A `user_dictionary` may be appended to the default dictionary. The dictionary should have the following CSV format: + +```text +, ... , ... , +``` + + +As a demonstration of how the user dictionary can be used, save the following dictionary to `$ES_HOME/config/userdict_ja.txt`: + +```text +東京スカイツリー,東京 スカイツリー,トウキョウ スカイツリー,カスタム名詞 +``` + +You can also inline the rules directly in the tokenizer definition using the `user_dictionary_rules` option: + +```console +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "tokenizer": { + "kuromoji_user_dict": { + "type": "kuromoji_tokenizer", + "mode": "extended", + "user_dictionary_rules": ["東京スカイツリー,東京 スカイツリー,トウキョウ スカイツリー,カスタム名詞"] + } + }, + "analyzer": { + "my_analyzer": { + "type": "custom", + "tokenizer": "kuromoji_user_dict" + } + } + } + } + } +} +``` + +`nbest_cost`/`nbest_examples` +: Additional expert user parameters `nbest_cost` and `nbest_examples` can be used to include additional tokens that are most likely according to the statistical model. If both parameters are used, the largest number of both is applied. + +`nbest_cost` +: The `nbest_cost` parameter specifies an additional Viterbi cost. The KuromojiTokenizer will include all tokens in Viterbi paths that are within the nbest_cost value of the best path. + +`nbest_examples` +: The `nbest_examples` can be used to find a `nbest_cost` value based on examples. For example, a value of /箱根山-箱根/成田空港-成田/ indicates that in the texts, 箱根山 (Mt. Hakone) and 成田空港 (Narita Airport) we’d like a cost that gives is us 箱根 (Hakone) and 成田 (Narita). + + +Then create an analyzer as follows: + +```console +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "tokenizer": { + "kuromoji_user_dict": { + "type": "kuromoji_tokenizer", + "mode": "extended", + "discard_punctuation": "false", + "user_dictionary": "userdict_ja.txt", + "lenient": "true" + } + }, + "analyzer": { + "my_analyzer": { + "type": "custom", + "tokenizer": "kuromoji_user_dict" + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "東京スカイツリー" +} +``` + +The above `analyze` request returns the following: + +```console-result +{ + "tokens" : [ { + "token" : "東京", + "start_offset" : 0, + "end_offset" : 2, + "type" : "word", + "position" : 0 + }, { + "token" : "スカイツリー", + "start_offset" : 2, + "end_offset" : 8, + "type" : "word", + "position" : 1 + } ] +} +``` + +`discard_compound_token` +: Whether original compound tokens should be discarded from the output with `search` mode. Defaults to `false`. Example output with `search` or `extended` mode and this option `true`: + + ``` + 関西, 国際, 空港 + ``` + + +::::{note} +If a text contains full-width characters, the `kuromoji_tokenizer` tokenizer can produce unexpected tokens. To avoid this, add the [`icu_normalizer` character filter](/reference/elasticsearch-plugins/analysis-icu-normalization-charfilter.md) to your analyzer. See [Normalize full-width characters](/reference/elasticsearch-plugins/analysis-kuromoji-analyzer.md#kuromoji-analyzer-normalize-full-width-characters). +:::: + + diff --git a/docs/reference/elasticsearch-plugins/analysis-kuromoji.md b/docs/reference/elasticsearch-plugins/analysis-kuromoji.md new file mode 100644 index 0000000000000..73442b4fc44a9 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-kuromoji.md @@ -0,0 +1,50 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-kuromoji.html +--- + +# Japanese (kuromoji) analysis plugin [analysis-kuromoji] + +The Japanese (kuromoji) analysis plugin integrates Lucene kuromoji analysis module into {{es}}. + + +## Installation [analysis-kuromoji-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install analysis-kuromoji +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-kuromoji/analysis-kuromoji-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-kuromoji/analysis-kuromoji-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-kuromoji/analysis-kuromoji-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-kuromoji/analysis-kuromoji-9.0.0-beta1.zip.asc). + + +## Removal [analysis-kuromoji-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove analysis-kuromoji +``` + +The node must be stopped before removing the plugin. + + + + + + + + + + + + + diff --git a/docs/reference/elasticsearch-plugins/analysis-nori-analyzer.md b/docs/reference/elasticsearch-plugins/analysis-nori-analyzer.md new file mode 100644 index 0000000000000..9f7f788dfe4db --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-nori-analyzer.md @@ -0,0 +1,16 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-nori-analyzer.html +--- + +# nori analyzer [analysis-nori-analyzer] + +The `nori` analyzer consists of the following tokenizer and token filters: + +* [`nori_tokenizer`](/reference/elasticsearch-plugins/analysis-nori-tokenizer.md) +* [`nori_part_of_speech`](/reference/elasticsearch-plugins/analysis-nori-speech.md) token filter +* [`nori_readingform`](/reference/elasticsearch-plugins/analysis-nori-readingform.md) token filter +* [`lowercase`](/reference/data-analysis/text-analysis/analysis-lowercase-tokenfilter.md) token filter + +It supports the `decompound_mode` and `user_dictionary` settings from [`nori_tokenizer`](/reference/elasticsearch-plugins/analysis-nori-tokenizer.md) and the `stoptags` setting from [`nori_part_of_speech`](/reference/elasticsearch-plugins/analysis-nori-speech.md). + diff --git a/docs/reference/elasticsearch-plugins/analysis-nori-number.md b/docs/reference/elasticsearch-plugins/analysis-nori-number.md new file mode 100644 index 0000000000000..6cfd9c6586a91 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-nori-number.md @@ -0,0 +1,96 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-nori-number.html +--- + +# nori_number token filter [analysis-nori-number] + +The `nori_number` token filter normalizes Korean numbers to regular Arabic decimal numbers in half-width characters. + +Korean numbers are often written using a combination of Hangul and Arabic numbers with various kinds of punctuation. For example, 3.2천 means 3200. This filter does this kind of normalization and allows a search for 3200 to match 3.2천 in text, but can also be used to make range facets based on the normalized numbers and so on. + +::::{note} +Notice that this analyzer uses a token composition scheme and relies on punctuation tokens being found in the token stream. Please make sure your `nori_tokenizer` has `discard_punctuation` set to false. In case punctuation characters, such as U+FF0E(.), is removed from the token stream, this filter would find input tokens 3 and 2천 and give outputs 3 and 2000 instead of 3200, which is likely not the intended result. + +If you want to remove punctuation characters from your index that are not part of normalized numbers, add a `stop` token filter with the punctuation you wish to remove after `nori_number` in your analyzer chain. + +:::: + + +Below are some examples of normalizations this filter supports. The input is untokenized text and the result is the single term attribute emitted for the input. + +* 영영칠 → 7 +* 일영영영 → 1000 +* 삼천2백2십삼 → 3223 +* 일조육백만오천일 → 1000006005001 +* 3.2천 → 3200 +* 1.2만345.67 → 12345.67 +* 4,647.100 → 4647.1 +* 15,7 → 157 (be aware of this weakness) + +For example: + +```console +PUT nori_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "tokenizer_discard_puncuation_false", + "filter": [ + "part_of_speech_stop_sp", "nori_number" + ] + } + }, + "tokenizer": { + "tokenizer_discard_puncuation_false": { + "type": "nori_tokenizer", + "discard_punctuation": "false" + } + }, + "filter": { + "part_of_speech_stop_sp": { + "type": "nori_part_of_speech", + "stoptags": ["SP"] + } + } + } + } + } +} + +GET nori_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "십만이천오백과 3.2천" +} +``` + +Which results in: + +```console-result +{ + "tokens" : [{ + "token" : "102500", + "start_offset" : 0, + "end_offset" : 6, + "type" : "word", + "position" : 0 + }, { + "token" : "과", + "start_offset" : 6, + "end_offset" : 7, + "type" : "word", + "position" : 1 + }, { + "token" : "3200", + "start_offset" : 8, + "end_offset" : 12, + "type" : "word", + "position" : 2 + }] +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-nori-readingform.md b/docs/reference/elasticsearch-plugins/analysis-nori-readingform.md new file mode 100644 index 0000000000000..4b588dd4cce8d --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-nori-readingform.md @@ -0,0 +1,53 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-nori-readingform.html +--- + +# nori_readingform token filter [analysis-nori-readingform] + +The `nori_readingform` token filter rewrites tokens written in Hanja to their Hangul form. + +```console +PUT nori_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "nori_tokenizer", + "filter": [ "nori_readingform" ] + } + } + } + } + } +} + +GET nori_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "鄕歌" <1> +} +``` + +1. A token written in Hanja: Hyangga + + +Which responds with: + +```console-result +{ + "tokens" : [ { + "token" : "향가", <1> + "start_offset" : 0, + "end_offset" : 2, + "type" : "word", + "position" : 0 + }] +} +``` + +1. The Hanja form is replaced by the Hangul translation. + + diff --git a/docs/reference/elasticsearch-plugins/analysis-nori-speech.md b/docs/reference/elasticsearch-plugins/analysis-nori-speech.md new file mode 100644 index 0000000000000..a1a2ee4ded8e6 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-nori-speech.md @@ -0,0 +1,88 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-nori-speech.html +--- + +# nori_part_of_speech token filter [analysis-nori-speech] + +The `nori_part_of_speech` token filter removes tokens that match a set of part-of-speech tags. The list of supported tags and their meanings can be found here: [Part of speech tags](https://lucene.apache.org/core/10_1_0/core/../analysis/nori/org/apache/lucene/analysis/ko/POS.Tag.md) + +It accepts the following setting: + +`stoptags` +: An array of part-of-speech tags that should be removed. + +and defaults to: + +```js +"stoptags": [ + "E", + "IC", + "J", + "MAG", "MAJ", "MM", + "SP", "SSC", "SSO", "SC", "SE", + "XPN", "XSA", "XSN", "XSV", + "UNA", "NA", "VSV" +] +``` + +For example: + +```console +PUT nori_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "nori_tokenizer", + "filter": [ + "my_posfilter" + ] + } + }, + "filter": { + "my_posfilter": { + "type": "nori_part_of_speech", + "stoptags": [ + "NR" <1> + ] + } + } + } + } + } +} + +GET nori_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "여섯 용이" <2> +} +``` + +1. Korean numerals should be removed (`NR`) +2. Six dragons + + +Which responds with: + +```console-result +{ + "tokens" : [ { + "token" : "용", + "start_offset" : 3, + "end_offset" : 4, + "type" : "word", + "position" : 1 + }, { + "token" : "이", + "start_offset" : 4, + "end_offset" : 5, + "type" : "word", + "position" : 2 + } ] +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-nori-tokenizer.md b/docs/reference/elasticsearch-plugins/analysis-nori-tokenizer.md new file mode 100644 index 0000000000000..e4b5526255c69 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-nori-tokenizer.md @@ -0,0 +1,268 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-nori-tokenizer.html +--- + +# nori_tokenizer [analysis-nori-tokenizer] + +The `nori_tokenizer` accepts the following settings: + +`decompound_mode` +: The decompound mode determines how the tokenizer handles compound tokens. It can be set to: + +`none` +: No decomposition for compounds. Example output: + + ``` + 가거도항 + 가곡역 + ``` + + +`discard` +: Decomposes compounds and discards the original form (**default**). Example output: + + ``` + 가곡역 => 가곡, 역 + ``` + + +`mixed` +: Decomposes compounds and keeps the original form. Example output: + + ``` + 가곡역 => 가곡역, 가곡, 역 + ``` + + + +`discard_punctuation` +: Whether punctuation should be discarded from the output. Defaults to `true`. + +`lenient` +: Whether the `user_dictionary` should be deduplicated on the provided `text`. False by default causing duplicates to generate an error. + +`user_dictionary` +: The Nori tokenizer uses the [mecab-ko-dic dictionary](https://bitbucket.org/eunjeon/mecab-ko-dic) by default. A `user_dictionary` with custom nouns (`NNG`) may be appended to the default dictionary. The dictionary should have the following format: + +```txt + [ ... ] +``` + +The first token is mandatory and represents the custom noun that should be added in the dictionary. For compound nouns the custom segmentation can be provided after the first token (`[ ... ]`). The segmentation of the custom compound nouns is controlled by the `decompound_mode` setting. + +As a demonstration of how the user dictionary can be used, save the following dictionary to `$ES_HOME/config/userdict_ko.txt`: + +```txt +c++ <1> +C쁠쁠 +세종 +세종시 세종 시 <2> +``` + +1. A simple noun +2. A compound noun (`세종시`) followed by its decomposition: `세종` and `시`. + + +Then create an analyzer as follows: + +```console +PUT nori_sample +{ + "settings": { + "index": { + "analysis": { + "tokenizer": { + "nori_user_dict": { + "type": "nori_tokenizer", + "decompound_mode": "mixed", + "discard_punctuation": "false", + "user_dictionary": "userdict_ko.txt", + "lenient": "true" + } + }, + "analyzer": { + "my_analyzer": { + "type": "custom", + "tokenizer": "nori_user_dict" + } + } + } + } + } +} + +GET nori_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "세종시" <1> +} +``` + +1. Sejong city + + +The above `analyze` request returns the following: + +```console-result +{ + "tokens" : [ { + "token" : "세종시", + "start_offset" : 0, + "end_offset" : 3, + "type" : "word", + "position" : 0, + "positionLength" : 2 <1> + }, { + "token" : "세종", + "start_offset" : 0, + "end_offset" : 2, + "type" : "word", + "position" : 0 + }, { + "token" : "시", + "start_offset" : 2, + "end_offset" : 3, + "type" : "word", + "position" : 1 + }] +} +``` + +1. This is a compound token that spans two positions (`mixed` mode). + + + +`user_dictionary_rules` +: You can also inline the rules directly in the tokenizer definition using the `user_dictionary_rules` option: + +```console +PUT nori_sample +{ + "settings": { + "index": { + "analysis": { + "tokenizer": { + "nori_user_dict": { + "type": "nori_tokenizer", + "decompound_mode": "mixed", + "user_dictionary_rules": ["c++", "C쁠쁠", "세종", "세종시 세종 시"] + } + }, + "analyzer": { + "my_analyzer": { + "type": "custom", + "tokenizer": "nori_user_dict" + } + } + } + } + } +} +``` + + +The `nori_tokenizer` sets a number of additional attributes per token that are used by token filters to modify the stream. You can view all these additional attributes with the following request: + +```console +GET _analyze +{ + "tokenizer": "nori_tokenizer", + "text": "뿌리가 깊은 나무는", <1> + "attributes" : ["posType", "leftPOS", "rightPOS", "morphemes", "reading"], + "explain": true +} +``` + +1. A tree with deep roots + + +Which responds with: + +```console-result +{ + "detail": { + "custom_analyzer": true, + "charfilters": [], + "tokenizer": { + "name": "nori_tokenizer", + "tokens": [ + { + "token": "뿌리", + "start_offset": 0, + "end_offset": 2, + "type": "word", + "position": 0, + "leftPOS": "NNG(General Noun)", + "morphemes": null, + "posType": "MORPHEME", + "reading": null, + "rightPOS": "NNG(General Noun)" + }, + { + "token": "가", + "start_offset": 2, + "end_offset": 3, + "type": "word", + "position": 1, + "leftPOS": "JKS(Subject case marker)", + "morphemes": null, + "posType": "MORPHEME", + "reading": null, + "rightPOS": "JKS(Subject case marker)" + }, + { + "token": "깊", + "start_offset": 4, + "end_offset": 5, + "type": "word", + "position": 2, + "leftPOS": "VA(Adjective)", + "morphemes": null, + "posType": "MORPHEME", + "reading": null, + "rightPOS": "VA(Adjective)" + }, + { + "token": "은", + "start_offset": 5, + "end_offset": 6, + "type": "word", + "position": 3, + "leftPOS": "ETM(Adnominal form transformative ending)", + "morphemes": null, + "posType": "MORPHEME", + "reading": null, + "rightPOS": "ETM(Adnominal form transformative ending)" + }, + { + "token": "나무", + "start_offset": 7, + "end_offset": 9, + "type": "word", + "position": 4, + "leftPOS": "NNG(General Noun)", + "morphemes": null, + "posType": "MORPHEME", + "reading": null, + "rightPOS": "NNG(General Noun)" + }, + { + "token": "는", + "start_offset": 9, + "end_offset": 10, + "type": "word", + "position": 5, + "leftPOS": "JX(Auxiliary postpositional particle)", + "morphemes": null, + "posType": "MORPHEME", + "reading": null, + "rightPOS": "JX(Auxiliary postpositional particle)" + } + ] + }, + "tokenfilters": [] + } +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-nori.md b/docs/reference/elasticsearch-plugins/analysis-nori.md new file mode 100644 index 0000000000000..3cc2e381af8cd --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-nori.md @@ -0,0 +1,43 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-nori.html +--- + +# Korean (nori) analysis plugin [analysis-nori] + +The Korean (nori) Analysis plugin integrates Lucene nori analysis module into elasticsearch. It uses the [mecab-ko-dic dictionary](https://bitbucket.org/eunjeon/mecab-ko-dic) to perform morphological analysis of Korean texts. + + +## Installation [analysis-nori-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install analysis-nori +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-nori/analysis-nori-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-nori/analysis-nori-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-nori/analysis-nori-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-nori/analysis-nori-9.0.0-beta1.zip.asc). + + +## Removal [analysis-nori-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove analysis-nori +``` + +The node must be stopped before removing the plugin. + + + + + + diff --git a/docs/reference/elasticsearch-plugins/analysis-phonetic-token-filter.md b/docs/reference/elasticsearch-plugins/analysis-phonetic-token-filter.md new file mode 100644 index 0000000000000..af804e4e4baa2 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-phonetic-token-filter.md @@ -0,0 +1,76 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-phonetic-token-filter.html +--- + +# phonetic token filter [analysis-phonetic-token-filter] + +The `phonetic` token filter takes the following settings: + +`encoder` +: Which phonetic encoder to use. Accepts `metaphone` (default), `double_metaphone`, `soundex`, `refined_soundex`, `caverphone1`, `caverphone2`, `cologne`, `nysiis`, `koelnerphonetik`, `haasephonetik`, `beider_morse`, `daitch_mokotoff`. + +`replace` +: Whether or not the original token should be replaced by the phonetic token. Accepts `true` (default) and `false`. Not supported by `beider_morse` encoding. + +```console +PUT phonetic_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "my_metaphone" + ] + } + }, + "filter": { + "my_metaphone": { + "type": "phonetic", + "encoder": "metaphone", + "replace": false + } + } + } + } + } +} + +GET phonetic_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "Joe Bloggs" <1> +} +``` + +1. Returns: `J`, `joe`, `BLKS`, `bloggs` + + +It is important to note that `"replace": false` can lead to unexpected behavior since the original and the phonetically analyzed version are both kept at the same token position. Some queries handle these stacked tokens in special ways. For example, the fuzzy `match` query does not apply [fuzziness](/reference/elasticsearch/rest-apis/common-options.md#fuzziness) to stacked synonym tokens. This can lead to issues that are difficult to diagnose and reason about. For this reason, it is often beneficial to use separate fields for analysis with and without phonetic filtering. That way searches can be run against both fields with differing boosts and trade-offs (e.g. only run a fuzzy `match` query on the original text field, but not on the phonetic version). + + +## Double metaphone settings [_double_metaphone_settings] + +If the `double_metaphone` encoder is used, then this additional setting is supported: + +`max_code_len` +: The maximum length of the emitted metaphone token. Defaults to `4`. + + +## Beider Morse settings [_beider_morse_settings] + +If the `beider_morse` encoder is used, then these additional settings are supported: + +`rule_type` +: Whether matching should be `exact` or `approx` (default). + +`name_type` +: Whether names are `ashkenazi`, `sephardic`, or `generic` (default). + +`languageset` +: An array of languages to check. If not specified, then the language will be guessed. Accepts: `any`, `common`, `cyrillic`, `english`, `french`, `german`, `hebrew`, `hungarian`, `polish`, `romanian`, `russian`, `spanish`. + diff --git a/docs/reference/elasticsearch-plugins/analysis-phonetic.md b/docs/reference/elasticsearch-plugins/analysis-phonetic.md new file mode 100644 index 0000000000000..7d4b02c3fa5d2 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-phonetic.md @@ -0,0 +1,39 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-phonetic.html +--- + +# Phonetic analysis plugin [analysis-phonetic] + +The Phonetic Analysis plugin provides token filters which convert tokens to their phonetic representation using Soundex, Metaphone, and a variety of other algorithms. + + +## Installation [analysis-phonetic-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install analysis-phonetic +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-phonetic/analysis-phonetic-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-phonetic/analysis-phonetic-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-phonetic/analysis-phonetic-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-phonetic/analysis-phonetic-9.0.0-beta1.zip.asc). + + +## Removal [analysis-phonetic-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove analysis-phonetic +``` + +The node must be stopped before removing the plugin. + + diff --git a/docs/reference/elasticsearch-plugins/analysis-plugins.md b/docs/reference/elasticsearch-plugins/analysis-plugins.md new file mode 100644 index 0000000000000..88a7af74daa78 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-plugins.md @@ -0,0 +1,52 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis.html +--- + +# Analysis plugins [analysis] + +Analysis plugins extend Elasticsearch by adding new analyzers, tokenizers, token filters, or character filters to Elasticsearch. + + +## Core analysis plugins [_core_analysis_plugins] + +The core analysis plugins are: + +[ICU](/reference/elasticsearch-plugins/analysis-icu.md) +: Adds extended Unicode support using the [ICU](http://site.icu-project.org/) libraries, including better analysis of Asian languages, Unicode normalization, Unicode-aware case folding, collation support, and transliteration. + +[Kuromoji](/reference/elasticsearch-plugins/analysis-kuromoji.md) +: Advanced analysis of Japanese using the [Kuromoji analyzer](https://www.atilika.org/). + +[Nori](/reference/elasticsearch-plugins/analysis-nori.md) +: Morphological analysis of Korean using the Lucene Nori analyzer. + +[Phonetic](/reference/elasticsearch-plugins/analysis-phonetic.md) +: Analyzes tokens into their phonetic equivalent using Soundex, Metaphone, Caverphone, and other codecs. + +[SmartCN](/reference/elasticsearch-plugins/analysis-smartcn.md) +: An analyzer for Chinese or mixed Chinese-English text. This analyzer uses probabilistic knowledge to find the optimal word segmentation for Simplified Chinese text. The text is first broken into sentences, then each sentence is segmented into words. + +[Stempel](/reference/elasticsearch-plugins/analysis-stempel.md) +: Provides high quality stemming for Polish. + +[Ukrainian](/reference/elasticsearch-plugins/analysis-ukrainian.md) +: Provides stemming for Ukrainian. + + +## Community contributed analysis plugins [_community_contributed_analysis_plugins] + +A number of analysis plugins have been contributed by our community: + +* [IK Analysis Plugin](https://github.com/medcl/elasticsearch-analysis-ik) (by Medcl) +* [Pinyin Analysis Plugin](https://github.com/medcl/elasticsearch-analysis-pinyin) (by Medcl) +* [Vietnamese Analysis Plugin](https://github.com/duydo/elasticsearch-analysis-vietnamese) (by Duy Do) +* [STConvert Analysis Plugin](https://github.com/medcl/elasticsearch-analysis-stconvert) (by Medcl) + + + + + + + + diff --git a/docs/reference/elasticsearch-plugins/analysis-polish-stop.md b/docs/reference/elasticsearch-plugins/analysis-polish-stop.md new file mode 100644 index 0000000000000..db2bac3b38585 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-polish-stop.md @@ -0,0 +1,68 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-polish-stop.html +--- + +# polish_stop token filter [analysis-polish-stop] + +The `polish_stop` token filter filters out Polish stopwords (`_polish_`), and any other custom stopwords specified by the user. This filter only supports the predefined `_polish_` stopwords list. If you want to use a different predefined list, then use the [`stop` token filter](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) instead. + +```console +PUT /polish_stop_example +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "analyzer_with_stop": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "polish_stop" + ] + } + }, + "filter": { + "polish_stop": { + "type": "polish_stop", + "stopwords": [ + "_polish_", + "jeść" + ] + } + } + } + } + } +} + +GET polish_stop_example/_analyze +{ + "analyzer": "analyzer_with_stop", + "text": "Gdzie kucharek sześć, tam nie ma co jeść." +} +``` + +The above request returns: + +```console-result +{ + "tokens" : [ + { + "token" : "kucharek", + "start_offset" : 6, + "end_offset" : 14, + "type" : "", + "position" : 1 + }, + { + "token" : "sześć", + "start_offset" : 15, + "end_offset" : 20, + "type" : "", + "position" : 2 + } + ] +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-smartcn.md b/docs/reference/elasticsearch-plugins/analysis-smartcn.md new file mode 100644 index 0000000000000..cfb51121aff79 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-smartcn.md @@ -0,0 +1,52 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-smartcn.html +--- + +# Smart Chinese analysis plugin [analysis-smartcn] + +The Smart Chinese Analysis plugin integrates Lucene’s Smart Chinese analysis module into elasticsearch. + +It provides an analyzer for Chinese or mixed Chinese-English text. This analyzer uses probabilistic knowledge to find the optimal word segmentation for Simplified Chinese text. The text is first broken into sentences, then each sentence is segmented into words. + + +## Installation [analysis-smartcn-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install analysis-smartcn +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-smartcn/analysis-smartcn-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-smartcn/analysis-smartcn-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-smartcn/analysis-smartcn-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-smartcn/analysis-smartcn-9.0.0-beta1.zip.asc). + + +## Removal [analysis-smartcn-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove analysis-smartcn +``` + +The node must be stopped before removing the plugin. + + +## `smartcn` tokenizer and token filter [analysis-smartcn-tokenizer] + +The plugin provides the `smartcn` analyzer, `smartcn_tokenizer` tokenizer, and `smartcn_stop` token filter which are not configurable. + +::::{note} +The `smartcn_word` token filter and `smartcn_sentence` have been deprecated. +:::: + + + + diff --git a/docs/reference/elasticsearch-plugins/analysis-smartcn_stop.md b/docs/reference/elasticsearch-plugins/analysis-smartcn_stop.md new file mode 100644 index 0000000000000..b1bc52570de2b --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-smartcn_stop.md @@ -0,0 +1,377 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-smartcn_stop.html +--- + +# smartcn_stop token filter [analysis-smartcn_stop] + +The `smartcn_stop` token filter filters out stopwords defined by `smartcn` analyzer (`_smartcn_`), and any other custom stopwords specified by the user. This filter only supports the predefined `_smartcn_` stopwords list. If you want to use a different predefined list, then use the [`stop` token filter](/reference/data-analysis/text-analysis/analysis-stop-tokenfilter.md) instead. + +```console +PUT smartcn_example +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "smartcn_with_stop": { + "tokenizer": "smartcn_tokenizer", + "filter": [ + "porter_stem", + "my_smartcn_stop" + ] + } + }, + "filter": { + "my_smartcn_stop": { + "type": "smartcn_stop", + "stopwords": [ + "_smartcn_", + "stack", + "的" + ] + } + } + } + } + } +} + +GET smartcn_example/_analyze +{ + "analyzer": "smartcn_with_stop", + "text": "哈喽,我们是 Elastic 我们是 Elastic Stack(Elasticsearch、Kibana、Beats 和 Logstash)的开发公司。从股票行情到 Twitter 消息流,从 Apache 日志到 WordPress 博文,我们可以帮助人们体验搜索的强大力量,帮助他们以截然不同的方式探索和分析数据" +} +``` + +The above request returns: + +```console-result +{ + "tokens": [ + { + "token": "哈", + "start_offset": 0, + "end_offset": 1, + "type": "word", + "position": 0 + }, + { + "token": "喽", + "start_offset": 1, + "end_offset": 2, + "type": "word", + "position": 1 + }, + { + "token": "我们", + "start_offset": 3, + "end_offset": 5, + "type": "word", + "position": 3 + }, + { + "token": "是", + "start_offset": 5, + "end_offset": 6, + "type": "word", + "position": 4 + }, + { + "token": "elast", + "start_offset": 7, + "end_offset": 14, + "type": "word", + "position": 5 + }, + { + "token": "我们", + "start_offset": 17, + "end_offset": 19, + "type": "word", + "position": 6 + }, + { + "token": "是", + "start_offset": 19, + "end_offset": 20, + "type": "word", + "position": 7 + }, + { + "token": "elast", + "start_offset": 21, + "end_offset": 28, + "type": "word", + "position": 8 + }, + { + "token": "elasticsearch", + "start_offset": 35, + "end_offset": 48, + "type": "word", + "position": 11 + }, + { + "token": "kibana", + "start_offset": 49, + "end_offset": 55, + "type": "word", + "position": 13 + }, + { + "token": "beat", + "start_offset": 56, + "end_offset": 61, + "type": "word", + "position": 15 + }, + { + "token": "和", + "start_offset": 62, + "end_offset": 63, + "type": "word", + "position": 16 + }, + { + "token": "logstash", + "start_offset": 64, + "end_offset": 72, + "type": "word", + "position": 17 + }, + { + "token": "开发", + "start_offset": 74, + "end_offset": 76, + "type": "word", + "position": 20 + }, + { + "token": "公司", + "start_offset": 76, + "end_offset": 78, + "type": "word", + "position": 21 + }, + { + "token": "从", + "start_offset": 79, + "end_offset": 80, + "type": "word", + "position": 23 + }, + { + "token": "股票", + "start_offset": 80, + "end_offset": 82, + "type": "word", + "position": 24 + }, + { + "token": "行情", + "start_offset": 82, + "end_offset": 84, + "type": "word", + "position": 25 + }, + { + "token": "到", + "start_offset": 84, + "end_offset": 85, + "type": "word", + "position": 26 + }, + { + "token": "twitter", + "start_offset": 86, + "end_offset": 93, + "type": "word", + "position": 27 + }, + { + "token": "消息", + "start_offset": 94, + "end_offset": 96, + "type": "word", + "position": 28 + }, + { + "token": "流", + "start_offset": 96, + "end_offset": 97, + "type": "word", + "position": 29 + }, + { + "token": "从", + "start_offset": 98, + "end_offset": 99, + "type": "word", + "position": 31 + }, + { + "token": "apach", + "start_offset": 100, + "end_offset": 106, + "type": "word", + "position": 32 + }, + { + "token": "日志", + "start_offset": 107, + "end_offset": 109, + "type": "word", + "position": 33 + }, + { + "token": "到", + "start_offset": 109, + "end_offset": 110, + "type": "word", + "position": 34 + }, + { + "token": "wordpress", + "start_offset": 111, + "end_offset": 120, + "type": "word", + "position": 35 + }, + { + "token": "博", + "start_offset": 121, + "end_offset": 122, + "type": "word", + "position": 36 + }, + { + "token": "文", + "start_offset": 122, + "end_offset": 123, + "type": "word", + "position": 37 + }, + { + "token": "我们", + "start_offset": 124, + "end_offset": 126, + "type": "word", + "position": 39 + }, + { + "token": "可以", + "start_offset": 126, + "end_offset": 128, + "type": "word", + "position": 40 + }, + { + "token": "帮助", + "start_offset": 128, + "end_offset": 130, + "type": "word", + "position": 41 + }, + { + "token": "人们", + "start_offset": 130, + "end_offset": 132, + "type": "word", + "position": 42 + }, + { + "token": "体验", + "start_offset": 132, + "end_offset": 134, + "type": "word", + "position": 43 + }, + { + "token": "搜索", + "start_offset": 134, + "end_offset": 136, + "type": "word", + "position": 44 + }, + { + "token": "强大", + "start_offset": 137, + "end_offset": 139, + "type": "word", + "position": 46 + }, + { + "token": "力量", + "start_offset": 139, + "end_offset": 141, + "type": "word", + "position": 47 + }, + { + "token": "帮助", + "start_offset": 142, + "end_offset": 144, + "type": "word", + "position": 49 + }, + { + "token": "他们", + "start_offset": 144, + "end_offset": 146, + "type": "word", + "position": 50 + }, + { + "token": "以", + "start_offset": 146, + "end_offset": 147, + "type": "word", + "position": 51 + }, + { + "token": "截然不同", + "start_offset": 147, + "end_offset": 151, + "type": "word", + "position": 52 + }, + { + "token": "方式", + "start_offset": 152, + "end_offset": 154, + "type": "word", + "position": 54 + }, + { + "token": "探索", + "start_offset": 154, + "end_offset": 156, + "type": "word", + "position": 55 + }, + { + "token": "和", + "start_offset": 156, + "end_offset": 157, + "type": "word", + "position": 56 + }, + { + "token": "分析", + "start_offset": 157, + "end_offset": 159, + "type": "word", + "position": 57 + }, + { + "token": "数据", + "start_offset": 159, + "end_offset": 161, + "type": "word", + "position": 58 + } + ] +} +``` + diff --git a/docs/reference/elasticsearch-plugins/analysis-stempel.md b/docs/reference/elasticsearch-plugins/analysis-stempel.md new file mode 100644 index 0000000000000..c6b392dc89dea --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-stempel.md @@ -0,0 +1,45 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-stempel.html +--- + +# Stempel Polish analysis plugin [analysis-stempel] + +The Stempel analysis plugin integrates Lucene’s Stempel analysis module for Polish into elasticsearch. + + +## Installation [analysis-stempel-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install analysis-stempel +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-stempel/analysis-stempel-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-stempel/analysis-stempel-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-stempel/analysis-stempel-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-stempel/analysis-stempel-9.0.0-beta1.zip.asc). + + +## Removal [analysis-stempel-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove analysis-stempel +``` + +The node must be stopped before removing the plugin. + + +## `stempel` tokenizer and token filters [analysis-stempel-tokenizer] + +The plugin provides the `polish` analyzer and the `polish_stem` and `polish_stop` token filters, which are not configurable. + + + diff --git a/docs/reference/elasticsearch-plugins/analysis-ukrainian.md b/docs/reference/elasticsearch-plugins/analysis-ukrainian.md new file mode 100644 index 0000000000000..53717b1c5e02a --- /dev/null +++ b/docs/reference/elasticsearch-plugins/analysis-ukrainian.md @@ -0,0 +1,45 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-ukrainian.html +--- + +# Ukrainian analysis plugin [analysis-ukrainian] + +The Ukrainian analysis plugin integrates Lucene’s UkrainianMorfologikAnalyzer into elasticsearch. + +It provides stemming for Ukrainian using the [Morfologik project](https://github.com/morfologik/morfologik-stemming). + + +## Installation [analysis-ukrainian-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install analysis-ukrainian +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-ukrainian/analysis-ukrainian-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-ukrainian/analysis-ukrainian-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-ukrainian/analysis-ukrainian-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-ukrainian/analysis-ukrainian-9.0.0-beta1.zip.asc). + + +## Removal [analysis-ukrainian-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove analysis-ukrainian +``` + +The node must be stopped before removing the plugin. + + +## `ukrainian` analyzer [analysis-ukrainian-analyzer] + +The plugin provides the `ukrainian` analyzer. + diff --git a/docs/reference/elasticsearch-plugins/api-extension-plugins.md b/docs/reference/elasticsearch-plugins/api-extension-plugins.md new file mode 100644 index 0000000000000..0011f306ddf61 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/api-extension-plugins.md @@ -0,0 +1,19 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/api.html +--- + +# API extension plugins [api] + +API extension plugins add new functionality to Elasticsearch by adding new APIs or features, usually to do with search or mapping. + + +## Community contributed API extension plugins [_community_contributed_api_extension_plugins] + +A number of plugins have been contributed by our community: + +* [carrot2 Plugin](https://github.com/carrot2/elasticsearch-carrot2): Results clustering with [carrot2](https://github.com/carrot2/carrot2) (by Dawid Weiss) +* [Elasticsearch Trigram Accelerated Regular Expression Filter](https://github.com/wikimedia/search-extra): (by Wikimedia Foundation/Nik Everett) +* [Elasticsearch Experimental Highlighter](https://github.com/wikimedia/search-highlighter): (by Wikimedia Foundation/Nik Everett) +* [Entity Resolution Plugin](https://github.com/zentity-io/zentity) ([zentity](https://zentity.io)): Real-time entity resolution with pure Elasticsearch (by Dave Moore) + diff --git a/docs/reference/elasticsearch-plugins/cloud-aws-best-practices.md b/docs/reference/elasticsearch-plugins/cloud-aws-best-practices.md new file mode 100644 index 0000000000000..be3ffb12821ce --- /dev/null +++ b/docs/reference/elasticsearch-plugins/cloud-aws-best-practices.md @@ -0,0 +1,35 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/cloud-aws-best-practices.html +--- + +# Best Practices in AWS [cloud-aws-best-practices] + +This section contains some other information about designing and managing an {{es}} cluster on your own AWS infrastructure. If you would prefer to avoid these operational details then you may be interested in a hosted {{es}} installation available on AWS-based infrastructure from [https://www.elastic.co/cloud](https://www.elastic.co/cloud). + +## Storage [_storage] + +EC2 instances offer a number of different kinds of storage. Please be aware of the following when selecting the storage for your cluster: + +* [Instance Store](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.md) is recommended for {{es}} clusters as it offers excellent performance and is cheaper than EBS-based storage. {{es}} is designed to work well with this kind of ephemeral storage because it replicates each shard across multiple nodes. If a node fails and its Instance Store is lost then {{es}} will rebuild any lost shards from other copies. +* [EBS-based storage](https://aws.amazon.com/ebs/) may be acceptable for smaller clusters (1-2 nodes). Be sure to use provisioned IOPS to ensure your cluster has satisfactory performance. +* [EFS-based storage](https://aws.amazon.com/efs/) is not recommended or supported as it does not offer satisfactory performance. Historically, shared network filesystems such as EFS have not always offered precisely the behaviour that {{es}} requires of its filesystem, and this has been known to lead to index corruption. Although EFS offers durability, shared storage, and the ability to grow and shrink filesystems dynamically, you can achieve the same benefits using {{es}} directly. + + +## Choice of AMI [_choice_of_ami] + +Prefer the [Amazon Linux 2 AMIs](https://aws.amazon.com/amazon-linux-2/) as these allow you to benefit from the lightweight nature, support, and EC2-specific performance enhancements that these images offer. + + +## Networking [_networking] + +* Smaller instance types have limited network performance, in terms of both [bandwidth and number of connections](https://lab.getbase.com/how-we-discovered-limitations-on-the-aws-tcp-stack/). If networking is a bottleneck, avoid [instance types](https://aws.amazon.com/ec2/instance-types/) with networking labelled as `Moderate` or `Low`. +* It is a good idea to distribute your nodes across multiple [availability zones](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.md) and use [shard allocation awareness](docs-content://deploy-manage/distributed-architecture/shard-allocation-relocation-recovery/shard-allocation-awareness.md) to ensure that each shard has copies in more than one availability zone. +* Do not span a cluster across regions. {{es}} expects that node-to-node connections within a cluster are reasonably reliable and offer high bandwidth and low latency, and these properties do not hold for connections between regions. Although an {{es}} cluster will behave correctly when node-to-node connections are unreliable or slow, it is not optimised for this case and its performance may suffer. If you wish to geographically distribute your data, you should provision multiple clusters and use features such as [cross-cluster search](docs-content://solutions/search/cross-cluster-search.md) and [cross-cluster replication](docs-content://deploy-manage/tools/cross-cluster-replication.md). + + +## Other recommendations [_other_recommendations] + +* If you have split your nodes into roles, consider [tagging the EC2 instances](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.md) by role to make it easier to filter and view your EC2 instances in the AWS console. +* Consider [enabling termination protection](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/terminating-instances.md#Using_ChangingDisableAPITermination) for all of your data and master-eligible nodes. This will help to prevent accidental termination of these nodes which could temporarily reduce the resilience of the cluster and which could cause a potentially disruptive reallocation of shards. +* If running your cluster using one or more [auto-scaling groups](https://docs.aws.amazon.com/autoscaling/ec2/userguide/AutoScalingGroup.md), consider protecting your data and master-eligible nodes [against termination during scale-in](https://docs.aws.amazon.com/autoscaling/ec2/userguide/as-instance-termination.md#instance-protection-instance). This will help to prevent automatic termination of these nodes which could temporarily reduce the resilience of the cluster and which could cause a potentially disruptive reallocation of shards. If these instances are protected against termination during scale-in then you can use shard allocation filtering to gracefully migrate any data off these nodes before terminating them manually. Refer to [](/reference/elasticsearch/index-settings/shard-allocation.md). diff --git a/docs/reference/elasticsearch-plugins/cloud-enterprise/ece-add-plugins.md b/docs/reference/elasticsearch-plugins/cloud-enterprise/ece-add-plugins.md new file mode 100644 index 0000000000000..5b167103be09d --- /dev/null +++ b/docs/reference/elasticsearch-plugins/cloud-enterprise/ece-add-plugins.md @@ -0,0 +1,41 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/cloud-enterprise/current/ece-add-plugins.html +--- + +# Plugin management (Cloud Enterprise) [ece-add-plugins] + +Plugins extend the core functionality of Elasticsearch. Elastic Cloud Enterprise makes it easy to add plugins to your deployment by providing a number of plugins that work with your version of Elasticsearch. One advantage of these plugins is that you generally don’t have to worry about upgrading plugins when upgrading to a new Elasticsearch version, unless there are breaking changes. The plugins simply are upgraded along with the rest of your deployment. + +Adding plugins to a deployment is as simple as selecting it from the list of available plugins, but different versions of Elasticsearch support different plugins. Plugins are available for different purposes, such as: + +* National language support, phonetic analysis, and extended unicode support +* Ingesting attachments in common formats and ingesting information about the geographic location of IP addresses +* Adding new field datatypes to Elasticsearch + +Additional plugins might be available. If a plugin is listed for your version of Elasticsearch, it can be used. + +To add plugins when creating a new deployment: + +1. [Log into the Cloud UI](docs-content://deploy-manage/deploy/cloud-enterprise/log-into-cloud-ui.md) and select **Create deployment**. +2. Make your initial deployment selections, then select **Customize Deployment**. +3. Beneath the Elasticsearch master node, expand the **Manage plugins and settings** caret. +4. Select the plugins you want. +5. Select **Create deployment**. + +The deployment spins up with the plugins installed. + +To add plugins to an existing deployment: + +1. [Log into the Cloud UI](docs-content://deploy-manage/deploy/cloud-enterprise/log-into-cloud-ui.md). +2. On the **Deployments** page, select your deployment. + + Narrow the list by name, ID, or choose from several other filters. To further define the list, use a combination of filters. + +3. From your deployment menu, go to the **Edit** page. +4. Beneath the Elasticsearch master node, expand the **Manage plugins and settings** caret. +5. Select the plugins that you want. +6. Select **Save changes**. + +There is no downtime when adding plugins to highly available deployments. The deployment is updated with new nodes that have the plugins installed. + diff --git a/docs/reference/elasticsearch-plugins/cloud/ec-adding-elastic-plugins.md b/docs/reference/elasticsearch-plugins/cloud/ec-adding-elastic-plugins.md new file mode 100644 index 0000000000000..66a9348ac2f2b --- /dev/null +++ b/docs/reference/elasticsearch-plugins/cloud/ec-adding-elastic-plugins.md @@ -0,0 +1,30 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/cloud/current/ec-adding-elastic-plugins.html +--- + +# Add plugins provided with Elasticsearch Service [ec-adding-elastic-plugins] + +You can use a variety of official plugins that are compatible with your version of {{es}}. When you upgrade to a new {{es}} version, these plugins are simply upgraded with the rest of your deployment. + +## Before you begin [ec_before_you_begin_6] + +Some restrictions apply when adding plugins. To learn more, check [Restrictions for {{es}} and {{kib}} plugins](cloud://docs/release-notes/cloud-hosted/known-issues.md#ec-restrictions-plugins). + +Only Gold, Platinum, Enterprise and Private subscriptions, running version 2.4.6 or later, have access to uploading custom plugins. All subscription levels, including Standard, can upload scripts and dictionaries. + +To enable a plugin for a deployment: + +1. Log in to the [Elasticsearch Service Console](https://cloud.elastic.co?page=docs&placement=docs-body). +2. Find your deployment on the home page in the Elasticsearch Service card and select **Manage** to access it directly. Or, select **Hosted deployments** to go to the deployments page to view all of your deployments. + + On the deployments page you can narrow your deployments by name, ID, or choose from several other filters. To customize your view, use a combination of filters, or change the format from a grid to a list. + +3. From the **Actions** dropdown, select **Edit deployment**. +4. Select **Manage user settings and extensions**. +5. Select the **Extensions** tab. +6. Select the plugins that you want to enable. +7. Select **Back**. +8. Select **Save**. The {{es}} cluster is then updated with new nodes that have the plugin installed. + + diff --git a/docs/reference/elasticsearch-plugins/cloud/ec-adding-plugins.md b/docs/reference/elasticsearch-plugins/cloud/ec-adding-plugins.md new file mode 100644 index 0000000000000..3e96631c389da --- /dev/null +++ b/docs/reference/elasticsearch-plugins/cloud/ec-adding-plugins.md @@ -0,0 +1,31 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/cloud/current/ec-adding-plugins.html +--- + +# Add plugins and extensions [ec-adding-plugins] + +Plugins extend the core functionality of {{es}}. There are many suitable plugins, including: + +* Discovery plugins, such as the cloud AWS plugin that allows discovering nodes on EC2 instances. +* Analysis plugins, to provide analyzers targeted at languages other than English. +* Scripting plugins, to provide additional scripting languages. + +Plugins can come from different sources: the official ones created or at least maintained by Elastic, community-sourced plugins from other users, and plugins that you provide. Some of the official plugins are always provided with our service, and can be [enabled per deployment](/reference/elasticsearch-plugins/cloud/ec-adding-elastic-plugins.md\). + +There are two ways to add plugins to a deployment in Elasticsearch Service: + +* [Enable one of the official plugins already available in Elasticsearch Service](/reference/elasticsearch-plugins/cloud/ec-adding-elastic-plugins.md\). +* [Upload a custom plugin and then enable it per deployment](/reference/elasticsearch-plugins/cloud/ec-custom-bundles.md\). + +Custom plugins can include the official {{es}} plugins not provided with Elasticsearch Service, any of the community-sourced plugins, or [plugins that you write yourself](/extend/index.md). Uploading custom plugins is available only to Gold, Platinum, and Enterprise subscriptions. For more information, check [Upload custom plugins and bundles](/reference/elasticsearch-plugins/cloud/ec-custom-bundles.md\). + +To learn more about the official and community-sourced plugins, refer to [{{es}} Plugins and Integrations](/reference/elasticsearch-plugins/index.md). + +For a detailed guide with examples of using the Elasticsearch Service API to create, get information about, update, and delete extensions and plugins, check [Managing plugins and extensions through the API](/reference/elasticsearch-plugins/cloud/ec-plugins-guide.md\). + +Plugins are not supported for {{kib}}. To learn more, check [Restrictions for {{es}} and {{kib}} plugins](cloud://docs/release-notes/cloud-hosted/known-issues.md#ec-restrictions-plugins). + + + + diff --git a/docs/reference/elasticsearch-plugins/cloud/ec-custom-bundles.md b/docs/reference/elasticsearch-plugins/cloud/ec-custom-bundles.md new file mode 100644 index 0000000000000..581be5d35dbcb --- /dev/null +++ b/docs/reference/elasticsearch-plugins/cloud/ec-custom-bundles.md @@ -0,0 +1,250 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/cloud/current/ec-custom-bundles.html +--- + +# Upload custom plugins and bundles [ec-custom-bundles] + +There are several cases where you might need your own files to be made available to your {{es}} cluster’s nodes: + +* Your own custom plugins, or third-party plugins that are not amongst the [officially available plugins](/reference/elasticsearch-plugins/plugin-management.md). +* Custom dictionaries, such as synonyms, stop words, compound words, and so on. +* Cluster configuration files, such as an Identity Provider metadata file used when you [secure your clusters with SAML](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/saml.md). + +To facilitate this, we make it possible to upload a ZIP file that contains the files you want to make available. Uploaded files are stored using Amazon’s highly-available S3 service. This is necessary so we do not have to rely on the availability of third-party services, such as the official plugin repository, when provisioning nodes. + +Custom plugins and bundles are collectively referred to as extensions. + +## Before you begin [ec_before_you_begin_7] + +The selected plugins/bundles are downloaded and provided when a node starts. Changing a plugin does not change it for nodes already running it. Refer to [Updating Plugins and Bundles](#ec-update-bundles-and-plugins). + +With great power comes great responsibility: your plugins can extend your deployment with new functionality, but also break it. Be careful. We obviously cannot guarantee that your custom code works. + +::::{important} +You cannot edit or delete a custom extension after it has been used in a deployment. To remove it from your deployment, you can disable the extension and update your deployment configuration. +:::: + + +Uploaded files cannot be bigger than 20MB for most subscription levels, for Platinum and Enterprise the limit is 8GB. + +It is important that plugins and dictionaries that you reference in mappings and configurations are available at all times. For example, if you try to upgrade {{es}} and de-select a dictionary that is referenced in your mapping, the new nodes will be unable to recover the cluster state and function. This is true even if the dictionary is referenced by an empty index you do not actually use. + + +## Prepare your files for upload [ec-prepare-custom-bundles] + +Plugins are uploaded as ZIP files. You need to choose whether your uploaded file should be treated as a *plugin* or as a *bundle*. Bundles are not installed as plugins. If you need to upload both a custom plugin and custom dictionaries, upload them separately. + +To prepare your files, create one of the following: + +Plugins +: A plugin is a ZIP file that contains a plugin descriptor file and binaries. + + The plugin descriptor file is called either `stable-plugin-descriptor.properties` for plugins built against the stable plugin API, or `plugin-descriptor.properties` for plugins built against the classic plugin API. A plugin ZIP file should only contain one plugin descriptor file. + + {{es}} assumes that the uploaded ZIP file contains binaries. If it finds any source code, it fails with an error message, causing provisioning to fail. Make sure you upload binaries, and not source code. + + ::::{note} + Plugins larger than 5GB should have the plugin descriptor file at the top of the archive. This order can be achieved by specifying at time of creating the ZIP file: + + ```sh + zip -r name-of-plugin.zip name-of-descriptor-file.properties * + ``` + + :::: + + +Bundles +: The entire content of a bundle is made available to the node by extracting to the {{es}} container’s `/app/config` directory. This is useful to make custom dictionaries available. Dictionaries should be placed in a `/dictionaries` folder in the root path of your ZIP file. + + Here are some examples of bundles: + + **Script** + + ```text + $ tree . + . + └── scripts + └── test.js + ``` + + The script `test.js` can be referred in queries as `"script": "test"`. + + **Dictionary of synonyms** + + ```text + $ tree . + . + └── dictionaries + └── synonyms.txt + ``` + + The dictionary `synonyms.txt` can be used as `synonyms.txt` or using the full path `/app/config/synonyms.txt` in the `synonyms_path` of the `synonym-filter`. + + To learn more about analyzing with synonyms, check [Synonym token filter](/reference/data-analysis/text-analysis/analysis-synonym-tokenfilter.md) and [Formatting Synonyms](https://www.elastic.co/guide/en/elasticsearch/guide/2.x/synonym-formats.html). + + **GeoIP database bundle** + + ```text + $ tree . + . + └── ingest-geoip + └── MyGeoLite2-City.mmdb + ``` + + Note that the extension must be `-(City|Country|ASN).mmdb`, and it must be a different name than the original file name `GeoLite2-City.mmdb` which already exists in Elasticsearch Service. To use this bundle, you can refer it in the GeoIP ingest pipeline as `MyGeoLite2-City.mmdb` under `database_file`. + + + +## Add your extension [ec-add-your-plugin] + +You must upload your files before you can apply them to your cluster configuration: + +1. Log in to the [Elasticsearch Service Console](https://cloud.elastic.co?page=docs&placement=docs-body). +2. Find your deployment on the home page in the Elasticsearch Service card and select **Manage** to access it directly. Or, select **Hosted deployments** to go to the deployments page to view all of your deployments. +3. Under **Features**, select **Extensions**. +4. Select **Upload extension**. +5. Complete the extension fields, including the {{es}} version. + + * Plugins must use full version notation down to the patch level, such as `7.10.1`. You cannot use wildcards. This version notation should match the version in your plugin’s plugin descriptor file. For classic plugins, it should also match the target deployment version. + * Bundles should specify major or minor versions with wildcards, such as `7.*` or `*`. Wildcards are recommended to ensure the bundle is compatible across all versions of these releases. + +6. Select the extension **Type**. +7. Under **Plugin file**, choose the file to upload. +8. Select **Create extension**. + +After creating your extension, you can [enable them for existing {{es}} deployments](#ec-update-bundles) or enable them when creating new deployments. + +::::{note} +Creating extensions larger than 200MB should be done through the extensions API. + +Refer to [Managing plugins and extensions through the API](/reference/elasticsearch-plugins/cloud/ec-plugins-guide.md) for more details. + +:::: + + + +## Update your deployment configuration [ec-update-bundles] + +After uploading your files, you can select to enable them when creating a new {{es}} deployment. For existing deployments, you must update your deployment configuration to use the new files: + +1. Log in to the [Elasticsearch Service Console](https://cloud.elastic.co?page=docs&placement=docs-body). +2. Find your deployment on the home page in the Elasticsearch Service card and select **Manage** to access it directly. Or, select **Hosted deployments** to go to the deployments page to view all of your deployments. + + On the deployments page you can narrow your deployments by name, ID, or choose from several other filters. To customize your view, use a combination of filters, or change the format from a grid to a list. + +3. From the **Actions** dropdown, select **Edit deployment**. +4. Select **Manage user settings and extensions**. +5. Select the **Extensions** tab. +6. Select the custom extension. +7. Select **Back**. +8. Select **Save**. The {{es}} cluster is then updated with new nodes that have the plugin installed. + + +## Update your extension [ec-update-bundles-and-plugins] + +While you can update the ZIP file for any plugin or bundle, these are downloaded and made available only when a node is started. + +You should be careful when updating an extension. If you update an existing extension with a new file, and if the file is broken for some reason, all the nodes could be in trouble, as a restart or move node could make even HA clusters non-available. + +If the extension is not in use by any deployments, then you are free to update the files or extension details as much as you like. However, if the extension is in use, and if you need to update it with a new file, it is recommended to [create a new extension](#ec-add-your-plugin) rather than updating the existing one that is in use. + +By following this method, only the one node would be down even if the extension file is faulty. This would ensure that HA clusters remain available. + +This method also supports having a test/staging deployment to test out the extension changes before applying them on a production deployment. + +You may delete the old extension after updating the deployment successfully. + +To update an extension with a new file version, + +1. Prepare a new plugin or bundle. +2. On the **Extensions** page, [upload a new extension](#ec-add-your-plugin). +3. Make your new files available by uploading them. +4. Find your deployment on the home page in the Elasticsearch Service card and select **Manage** to access it directly. Or, select **Hosted deployments** to go to the deployments page to view all of your deployments. + + On the deployments page you can narrow your deployments by name, ID, or choose from several other filters. To customize your view, use a combination of filters, or change the format from a grid to a list. + +5. From the **Actions** dropdown, select **Edit deployment**. +6. Select **Manage user settings and extensions**. +7. Select the **Extensions** tab. +8. Select the new extension and de-select the old one. +9. Select **Back**. +10. Select **Save**. + + +## How to use the extensions API [ec-extension-api-usage-guide] + +::::{note} +For a full set of examples, check [Managing plugins and extensions through the API](/reference/elasticsearch-plugins/cloud/ec-plugins-guide.md). +:::: + + +If you don’t already have one, create an [API key](docs-content://deploy-manage/api-keys/elastic-cloud-api-keys.md) + +There are ways that you can use the extensions API to upload a file. + +### Method 1: Use HTTP `POST` to create metadata and then upload the file using HTTP `PUT` [ec_method_1_use_http_post_to_create_metadata_and_then_upload_the_file_using_http_put] + +Step 1: Create metadata + +```text +curl -XPOST \ +-H "Authorization: ApiKey $EC_API_KEY" \ +-H 'content-type:application/json' \ +https://api.elastic-cloud.com/api/v1/deployments/extensions \ +-d'{ + "name" : "synonyms-v1", + "description" : "The best synonyms ever", + "extension_type" : "bundle", + "version" : "7.*" +}' +``` + +Step 2: Upload the file + +```text +curl -XPUT \ +-H "Authorization: ApiKey $EC_API_KEY" \ +"https://api.elastic-cloud.com/api/v1/deployments/extensions/$extension_id" \ +-T /tmp/synonyms.zip +``` + +If you are using a client that does not have native `application/zip` handling like `curl`, be sure to use the equivalent of the following with `content-type: multipart/form-data`: + +```text +curl -XPUT \ +-H 'Expect:' \ +-H 'content-type: multipart/form-data' \ +-H "Authorization: ApiKey $EC_API_KEY" \ +"https://api.elastic-cloud.com/api/v1/deployments/extensions/$extension_id" -F "file=@/tmp/synonyms.zip" +``` + +For example, using the Python `requests` module, the `PUT` request would be as follows: + +```text +import requests +files = {'file': open('/tmp/synonyms.zip','rb')} +r = requests.put('https://api.elastic-cloud.com/api/v1/deployments/extensions/{}'.format(extension_id), files=files, headers= {'Authorization': 'ApiKey {}'.format(EC_API_KEY)}) +``` + + +### Method 2: Single step. Use a `download_url` so that the API server downloads the object at the specified URL [ec_method_2_single_step_use_a_download_url_so_that_the_api_server_downloads_the_object_at_the_specified_url] + +```text +curl -XPOST \ +-H "Authorization: ApiKey $EC_API_KEY" \ +-H 'content-type:application/json' \ +https://api.elastic-cloud.com/api/v1/deployments/extensions \ +-d'{ + "name" : "anylysis_icu", + "description" : "Helpful description", + "extension_type" : "plugin", + "version" : "7.13.2", + "download_url": "https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-icu/analysis-icu-7.13.2.zip" +}' +``` + +Please refer to the [Extensions API reference](https://www.elastic.co/docs/api/doc/cloud/group/endpoint-extensions) for the complete set of HTTP methods and payloads. + + + diff --git a/docs/reference/elasticsearch-plugins/cloud/ec-plugins-guide.md b/docs/reference/elasticsearch-plugins/cloud/ec-plugins-guide.md new file mode 100644 index 0000000000000..9f2dbaae2ac8f --- /dev/null +++ b/docs/reference/elasticsearch-plugins/cloud/ec-plugins-guide.md @@ -0,0 +1,498 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/cloud/current/ec-plugins-guide.html +--- + +# Managing plugins and extensions through the API [ec-plugins-guide] + +This guide provides a full list of tasks for managing [plugins and extensions](ec-adding-plugins.md) in Elasticsearch Service, using the API. + +* [Create an extension](ec-plugins-guide.md#ec-extension-guide-create) +* [Add an extension to a deployment plan](ec-plugins-guide.md#ec-extension-guide-add-plan) +* [Get an extension](ec-plugins-guide.md#ec-extension-guide-get-extension) +* [Update the name of an existing extension](ec-plugins-guide.md#ec-extension-guide-update-name) +* [Update the type of an existing extension](ec-plugins-guide.md#ec-extension-guide-update-type) +* [Update the version of an existing bundle](ec-plugins-guide.md#ec-extension-guide-update-version-bundle) +* [Update the version of an existing plugin](ec-plugins-guide.md#ec-extension-guide-update-version-plugin) +* [Update the file associated to an existing extension](ec-plugins-guide.md#ec-extension-guide-update-file) +* [Upgrade Elasticsearch](ec-plugins-guide.md#ec-extension-guide-upgrade-elasticsearch) +* [Delete an extension](ec-plugins-guide.md#ec-extension-guide-delete) + + +## Create an extension [ec-extension-guide-create] + +There are two methods to create an extension. You can: + +1. Stream the file from a publicly-accessible download URL. +2. Upload the file from a local file path. + +::::{note} +For plugins larger than 200MB the download URL option **must** be used. Plugins larger than 8GB cannot be uploaded with either method. +:::: + + +These two examples are for the `plugin` extension type. For bundles, change `extension_type` to `bundle`. + +For plugins, `version` must match (exactly) the `elasticsearch.version` field defined in the plugin’s `plugin-descriptor.properties` file. Check [Help for plugin authors](/extend/index.md) for details. For plugins larger than 5GB, the `plugin-descriptor.properties` file needs to be at the top of the archive. This ensures that the our verification process is able to detect that it is an Elasticsearch plugin; otherwise the plugin will be rejected by the API. This order can be achieved by specifying at time of creating the ZIP file: `zip -r name-of-plugin.zip plugin-descriptor.properties *`. + +For bundles, we recommend setting `version` using wildcard notation that matches the major version of the Elasticsearch deployment. For example, if Elasticsearch is on version 8.4.3, simply set `8.*` as the version. The value `8.*` means that the bundle is compatible with all 8.x versions of Elasticsearch. + +$$$ec-extension-guide-create-option1$$$ +**Option 1: Stream the file from a publicly-accessible download URL** + +```sh +curl -X POST \ + https://api.elastic-cloud.com/api/v1/deployments/extensions \ + -H "Authorization: ApiKey $CLOUD_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "download_url" : "https://my_site/custom-plugin-8.4.3.zip", + "extension_type" : "plugin", + "name" : "custom-plugin", + "version" : "8.4.3" +}' +``` + +The single POST request creates an extension with the metadata, validates, and streams the file from the `download_url` specified. The accepted protocols for `download_url` are `http` and `https`. + +::::{note} +The `download_url` must be directly and publicly accessible. There is currently no support for redirection or authentication unless it contains security credentials/tokens expected by your HTTP service as part of the URL. Otherwise, use the following Option 2 to upload the file from a local path. +:::: + + +::::{note} +When the file is larger than 5GB, the request may timeout after 2-5 minutes, but streaming will continue on the server. Check the Extensions page in the Cloud UI after 5-10 minutes to make sure that the plugin has been created. A successfully created plugin will contain correct name, type, version, size, and last modified information. +:::: + + +$$$ec-extension-guide-create-option2$$$ +**Option 2: Upload the file from a local file path** + +This option requires a two step process. First, create the metadata for the extension: + +```sh +curl -X POST \ + https://api.elastic-cloud.com/api/v1/deployments/extensions \ + -H "Authorization: ApiKey $CLOUD_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "extension_type": "plugin", + "name": "custom-plugin", + "version" : "8.4.3" +}' +``` + +```sh +{ + "url": "repo://4226448541", + "version": "8.4.3", + "extension_type": "plugin", + "id": "4226448541", + "name": "custom-plugin" +} +``` + +The response returns a `url` you can reference later in the plan (the numeric value in the `url` is the `EXTENSION_ID`). Use this `EXTENSION_ID` in the following PUT call: + +```sh +curl -v -X PUT "https://api.elastic-cloud.com/api/v1/deployments/extensions/EXTENSION_ID" \ +-H 'Content-type:application/zip' \ +-H "Authorization: ApiKey $CLOUD_API_KEY" \ +-H 'Expect:' \ +-T "/path_to/custom-plugin-8.4.3.zip" +``` + +::::{note} +When using curl, always use the `-T` option. DO NOT use `-F` (we have seen inconsistency in curl behavior across systems; using `-F` can result in partially uploaded or truncated files). +:::: + + +The above PUT request uploads the file from the local path specified. This request is synchronous. An HTTP 200 response indicates that the file has been successfully uploaded and is ready for use. + +```sh +{ + "url": "repo://2286113333", + "version": "8.4.3", + "extension_type": "plugin", + "id": "2286113333", + "name": "custom-plugin" +} +``` + + +## Add an extension to a deployment plan [ec-extension-guide-add-plan] + +Once the extension is created and uploaded, you can add the extension using its `EXTENSION_ID` in an [update deployment API call](https://www.elastic.co/docs/api/doc/cloud/operation/operation-update-deployment). + +The following are examples of a GCP plan. Your specific deployment plan will be different. The important parts related to extensions are in the `user_plugins` object. + +```sh +{ + "name": "Extensions", + "prune_orphans": false, + "resources": { + "elasticsearch": [ + { + "region": "gcp-us-central1", + "ref_id": "main-elasticsearch", + "plan": { + "cluster_topology": [ + + ... + + ], + "elasticsearch": { + "version": "8.4.3", + "enabled_built_in_plugins": [ ], + "user_bundles": [ + { + "name": "custom-plugin", + "url": "repo://2286113333", + "elasticsearch_version": "8.4.3" + } + ] + }, + "deployment_template": { + "id": "gcp-storage-optimized-v3" + } + } + } + ] + } +} +``` + +You can use the [cat plugins API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cat-plugins) to confirm that the plugin has been deployed successfully to Elasticsearch. + +The previous examples are for plugins. For bundles, use the `user_bundles` construct instead. + +```sh + "user_bundles": [ + { + "elasticsearch_version": "8.*", + "name": "custom-bundle", + "url": "repo://5886113212" + } + ] +``` + + +## Get an extension [ec-extension-guide-get-extension] + +You can use the GET call to retrieve information about an extension. + +To list all extensions for the account: + +```sh +curl -X GET \ + https://api.elastic-cloud.com/api/v1/deployments/extensions \ + -H 'Content-Type: application/json' \ + -H "Authorization: ApiKey $CLOUD_API_KEY" \ +``` + +To get a specific extension: + +```sh +curl -X GET \ + https://api.elastic-cloud.com/api/v1/deployments/extensions/EXTENSION_ID \ + -H 'Content-Type: application/json' \ + -H "Authorization: ApiKey $CLOUD_API_KEY" \ +``` + +The previous GET calls support an optional `include_deployments` parameter. When set to `true`, the call also returns the deployments that currently have the extension in-use: + +```sh +curl -X GET \ + https://api.elastic-cloud.com/api/v1/deployments/extensions/EXTENSION_ID?include_deployments=true \ + -H 'Content-Type: application/json' \ + -H "Authorization: ApiKey $CLOUD_API_KEY" \ +``` + +For example, the previous call returns: + +```sh +{ + "name": "custom-plugin", + "url": "repo://2286113333", + "extension_type": "plugin", + "deployments": [ + "f91f3a9360a74e9d8c068cd2698c92ea" + ], + "version": "8.4.3", + "id": "2286113333" +} +``` + + +## Update the name of an existing extension [ec-extension-guide-update-name] + +To update the name of an existing extension, simply update the name field without uploading a new file. You do not have to specify the `download_url` when only making metadata changes to an extension. + +Example using the [Option 1](ec-plugins-guide.md#ec-extension-guide-create-option1) create an extension method: + +```sh +curl -X POST \ + https://api.elastic-cloud.com/api/v1/deployments/extensions/EXTENSION_ID \ + -H "Authorization: ApiKey $CLOUD_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "extension_type" : "plugin", + "name": "custom-plugin-07012020", + "version" : "8.4.3" +}' +``` + +Example using the [Option 2](ec-plugins-guide.md#ec-extension-guide-create-option2) create an extension method: + +```sh +curl -X POST \ + https://api.elastic-cloud.com/api/v1/deployments/extensions/EXTENSION_ID \ + -H "Authorization: ApiKey $CLOUD_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "extension_type" : "plugin", + "name": "custom-plugin-07012020", + "version" : "8.4.3" +}' +``` + +Updating the name of an existing extension does not change its `EXTENSION_ID`. + + +## Update the type of an existing extension [ec-extension-guide-update-type] + +Updating `extension_type` has no effect. You cannot change the extension’s type (`plugin` versus `bundle`) after the initial creation of a plugin. + + +## Update the version of an existing bundle [ec-extension-guide-update-version-bundle] + +For bundles, we recommend setting `version` using wildcard notation that matches the major version of the Elasticsearch deployment. For example, if Elasticsearch is on version 8.4.3, simply set `8.*` as the version. The value `8.*` means that the bundle is compatible with all 7.x versions of Elasticsearch. + +For example, if the bundle was previously uploaded with the version `8.4.2`, simply update the version field. You no longer have to specify the `download_url` when only making metadata changes to a bundle. + +Example using the [Option 1](ec-plugins-guide.md#ec-extension-guide-create-option1) create an extension method: + +```sh +curl -X POST \ + https://api.elastic-cloud.com/api/v1/deployments/extensions/EXTENSION_ID \ + -H "Authorization: ApiKey $CLOUD_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "extension_type" : "bundle", + "name": "custom-bundle", + "version" : "8.*" +}' +``` + +Example using the [Option 2](ec-plugins-guide.md#ec-extension-guide-create-option2) create an extension method: + +```sh +curl -X POST \ + https://api.elastic-cloud.com/api/v1/deployments/extensions/EXTENSION_ID \ + -H "Authorization: ApiKey $CLOUD_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "extension_type" : "bundle", + "name": "custom-bundle", + "version" : "8.*" +}' +``` + +Updating the name of an existing extension does not change its `EXTENSION_ID`. + + +## Update the version of an existing plugin [ec-extension-guide-update-version-plugin] + +For plugins, `version` must match (exactly) the `elasticsearch.version` field defined in the plugin’s `plugin-descriptor.properties` file. Check [Help for plugin authors](/extend/index.md) for details. If you change the version, the associated plugin file *must* also be updated accordingly. + + +## Update the file associated to an existing extension [ec-extension-guide-update-file] + +You may want to update an uploaded file for an existing extension without performing an Elasticsearch upgrade. If you are updating the extension to prepare for an Elasticsearch upgrade, check the [Upgrade Elasticsearch](ec-plugins-guide.md#ec-extension-guide-upgrade-elasticsearch) scenario later on this page. + +This example is for the `plugin` extension type. For bundles, change `extension_type` to `bundle`. + +If you used [Option 1](ec-plugins-guide.md#ec-extension-guide-create-option1) to create the extension, simply re-run the POST request with the `download_url` pointing to the location of your updated extension file. + +```sh +curl -X POST \ + https://api.elastic-cloud.com/api/v1/deployments/extensions/EXTENSION_ID \ + -H "Authorization: ApiKey $CLOUD_API_KEY" \ + -H 'Content-Type: application/json' \ + -d '{ + "download_url" : "https://my_site/custom-plugin-8.4.3-10212022.zip", + "extension_type" : "plugin", + "name": "custom-plugin-10212022", + "version" : "8.4.3" +}' +``` + +If you used [Option 2](ec-plugins-guide.md#ec-extension-guide-create-option2) to create the extension, simply re-run the PUT request with the `file` parameter pointing to the location of your updated extension file. + +```sh +curl -v -X PUT "https://api.elastic-cloud.com/api/v1/deployments/extensions/EXTENSION_ID" \ +-H 'Content-type:application/zip' \ +-H "Authorization: ApiKey $CLOUD_API_KEY" \ +-H 'Expect:' \ +-T "/path_to/custom-plugin-8.4.3-10212022.zip" +``` + +::::{important} +If you are not making any other plan changes and simply updating an extension file, you need to issue a no-op plan so that Elasticsearch will make use of this new file. A *no-op* (no operation) plan triggers a rolling restart on the deployment, applying the same (unchanged) plan as the current plan. +:::: + + +Updating the file of an existing extension or bundle does not change its `EXTENSION_ID`. + + +## Upgrade Elasticsearch [ec-extension-guide-upgrade-elasticsearch] + +When you upgrade Elasticsearch in a deployment, you must ensure that: + +* Bundles are on versions that are compatible with the Elasticsearch version that you are upgrading to. +* Plugins match (exactly) the Elasticsearch upgrade version. + +**To prepare existing bundle and update the plan:** + +1. **Update the bundle version to be compatible with the Elasticsearch upgrade version.** + + Bundles using wildcard notation for versions (for example, `7.*`, `8.*`) in their extension metadata are compatible with all minor versions of the same Elasticsearch major version. In other words, if you are performing a patch (for example, from `8.4.2` to `8.4.3`) or a minor (for example `8.3.0` to `8.4.3`) version upgrade of Elasticsearch and you are already using `8.*` as the `version` for the extension, you are ready for the Elasticsearch upgrade and can proceed to Step 2. + + However, if you are using a specific `version` for bundles, or upgrading to a major version, you must update the metadata of the extension to specify the matching Elasticsearch `version` that you are upgrading to, or use the wildcard syntax described in the previous paragraph. For example, if you are upgrading from version 7.x to 8.x, set `version` to `8.*` before the upgrade. Refer to [Update the version of an existing bundle](ec-plugins-guide.md#ec-extension-guide-update-version-bundle). + +2. **Update the bundle reference as part of an upgrade plan.** + + Submit a plan change that performs the following operations in a *single* [update deployment API](https://www.elastic.co/docs/api/doc/cloud/operation/operation-update-deployment) call: + + * Upgrade the version of Elasticsearch to the upgrade version (for example, `8.4.3`). + * Update reference to the existing bundle to be compatible with Elasticsearch upgrade version (for example, `8.*`). + + This triggers a rolling upgrade plan change to the later Elasticsearch version and updates the reference to the bundle at the same time. + + The following example shows the upgrade of an Elasticsearch deployment and its bundle. You can also upgrade other deployment resources within the same plan change. + + Update `resources.elasticsearch.plan.elasticsearch.version` and `resources.elasticsearch.plan.cluster_topology.elasticsearch.user_bundles.elasticsearch_version` accordingly. + + ```sh + { + "name": "Extensions", + "prune_orphans": false, + "resources": { + "elasticsearch": [ + { + "region": "gcp-us-central1", + "ref_id": "main-elasticsearch", + "plan": { + "cluster_topology": [ + ... + ], + "elasticsearch": { + "version": "8.4.3", + "enabled_built_in_plugins": [], + "user_bundles": [ + { + "elasticsearch_version": "7.*", + "name": "custom-bundle", + "url": "repo://5886113212" + } + ] + + }, + "deployment_template": { + "id": "gcp-storage-optimized-v3" + } + } + } + ] + } + } + ``` + + +**To create a new plugin and update the plan:** + +Unlike bundles, plugins *must* match the Elasticsearch version down to the patch level (for example, `8.4.3`). When upgrading Elasticsearch to a new patch, minor, or major version, update the version in the extension metadata and update the extension file. The following example updates an existing plugin and upgrades the Elasticsearch deployment from version 8.3.0 to 8.4.3. + +1. **Create a new plugin that matches the Elasticsearch upgrade version.** + + Follow the steps in [Get an extension](ec-plugins-guide.md#ec-extension-guide-get-extension) to create a new extension with a `version` metadata field and the plugin’s `elasticsearch.version` field in `plugin-descriptor.properties` that matches the Elasticsearch upgrade version (for example, `8.4.3`). + +2. **Remove the old plugin and add the new plugin to the upgrade plan.** + + Submit a plan change that performs the following operations in a *single* [update deployment API](https://www.elastic.co/docs/api/doc/cloud/operation/operation-update-deployment) call: + + * Upgrade the version of Elasticsearch to the upgrade version (for example, `8.4.3`). + * Remove reference to the the plugin on the older version (for example, `8.3.0`) from the plan. + * Add reference to the new plugin on the upgrade version (for example, `8.4.3`) to the plan. + + This triggers a rolling upgrade plan change to the later Elasticsearch version, removes reference to the older plugin, and deploys your updated plugin at the same time. + + The following example shows the upgrade of an Elasticsearch deployment and its plugin. You can also upgrade other deployment resources within the same plan change. + + Update deployment plans, update `resources.elasticsearch.plan.elasticsearch.version` and `resources.elasticsearch.plan.cluster_topology.elasticsearch.user_plugins.elasticsearch_version` accordingly. + + ```sh + { + "name": "Extensions", + "prune_orphans": false, + "resources": { + "elasticsearch": [ + { + "region": "gcp-us-central1", + "ref_id": "main-elasticsearch", + "plan": { + "cluster_topology": [ + ... + ], + "elasticsearch": { + "version": "8.4.3", + "enabled_built_in_plugins": [], + "user_plugins": [ + { + "elasticsearch_version": "8.4.3", + "name": "custom-plugin", + "url": "repo://4226448541" + } + ] + + }, + "deployment_template": { + "id": "gcp-storage-optimized-v3" + } + } + } + ] + } + } + ``` + + You can use the [cat plugins API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cat-plugins) to confirm that the plugin has been upgraded successfully to Elasticsearch. + + + +## Delete an extension [ec-extension-guide-delete] + +You can delete an extension simply by calling a DELETE against the EXTENSION_ID of interest: + +```sh +curl -X DELETE \ + https://api.elastic-cloud.com/api/v1/deployments/extensions/EXTENSION_ID \ + -H "Authorization: ApiKey $CLOUD_API_KEY" \ + -H 'Content-Type: application/json' +``` + +Only extensions not currently referenced in a deployment plan can be deleted. If you attempt to delete an extension that is in use, you will receive an HTTP 400 Bad Request error like the following, indicating the deployments that are currently using the extension. + +```sh +{ + "errors": [ + { + "message": "Cannot delete extension [EXTENSION_ID]. It is used by deployments [DEPLOYMENT_NAME].", + "code": "extensions.extension_in_use" + } + ] +} +``` + +To remove an extension reference from a deployment plan, simply update the deployment with the extension reference deleted from the `user_plugins` or `user_bundles` arrays. Check [Add an extension to a deployment plan](ec-plugins-guide.md#ec-extension-guide-add-plan) for where these are specified in the plan. + diff --git a/docs/reference/elasticsearch-plugins/discovery-azure-classic-long.md b/docs/reference/elasticsearch-plugins/discovery-azure-classic-long.md new file mode 100644 index 0000000000000..b59f22f42df9c --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-azure-classic-long.md @@ -0,0 +1,220 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-azure-classic-long.html +--- + +# Setup process for Azure Discovery [discovery-azure-classic-long] + +We will expose here one strategy which is to hide our Elasticsearch cluster from outside. + +With this strategy, only VMs behind the same virtual port can talk to each other. That means that with this mode, you can use Elasticsearch unicast discovery to build a cluster, using the Azure API to retrieve information about your nodes. + +## Prerequisites [discovery-azure-classic-long-prerequisites] + +Before starting, you need to have: + +* A [Windows Azure account](https://azure.microsoft.com/en-us/) +* OpenSSL that isn’t from MacPorts, specifically `OpenSSL 1.0.1f 6 Jan 2014` doesn’t seem to create a valid keypair for ssh. FWIW, `OpenSSL 1.0.1c 10 May 2012` on Ubuntu 14.04 LTS is known to work. +* SSH keys and certificate + + You should follow [this guide](http://azure.microsoft.com/en-us/documentation/articles/linux-use-ssh-key/) to learn how to create or use existing SSH keys. If you have already done it, you can skip the following. + + Here is a description on how to generate SSH keys using `openssl`: + + ```sh + # You may want to use another dir than /tmp + cd /tmp + openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout azure-private.key -out azure-certificate.pem + chmod 600 azure-private.key azure-certificate.pem + openssl x509 -outform der -in azure-certificate.pem -out azure-certificate.cer + ``` + + Generate a keystore which will be used by the plugin to authenticate with a certificate all Azure API calls. + + ```sh + # Generate a keystore (azurekeystore.pkcs12) + # Transform private key to PEM format + openssl pkcs8 -topk8 -nocrypt -in azure-private.key -inform PEM -out azure-pk.pem -outform PEM + # Transform certificate to PEM format + openssl x509 -inform der -in azure-certificate.cer -out azure-cert.pem + cat azure-cert.pem azure-pk.pem > azure.pem.txt + # You MUST enter a password! + openssl pkcs12 -export -in azure.pem.txt -out azurekeystore.pkcs12 -name azure -noiter -nomaciter + ``` + + Upload the `azure-certificate.cer` file both in the Elasticsearch Cloud Service (under `Manage Certificates`), and under `Settings -> Manage Certificates`. + + ::::{important} + When prompted for a password, you need to enter a non empty one. + :::: + + + See this [guide](http://www.windowsazure.com/en-us/manage/linux/how-to-guides/ssh-into-linux/) for more details about how to create keys for Azure. + + Once done, you need to upload your certificate in Azure: + + * Go to the [management console](https://account.windowsazure.com/). + * Sign in using your account. + * Click on `Portal`. + * Go to Settings (bottom of the left list) + * On the bottom bar, click on `Upload` and upload your `azure-certificate.cer` file. + + You may want to use [Windows Azure Command-Line Tool](http://www.windowsazure.com/en-us/develop/nodejs/how-to-guides/command-line-tools/): + +* Install [NodeJS](https://github.com/joyent/node/wiki/Installing-Node.js-via-package-manager), for example using homebrew on MacOS X: + + ```sh + brew install node + ``` + +* Install Azure tools + + ```sh + sudo npm install azure-cli -g + ``` + +* Download and import your azure settings: + + ```sh + # This will open a browser and will download a .publishsettings file + azure account download + + # Import this file (we have downloaded it to /tmp) + # Note, it will create needed files in ~/.azure. You can remove azure.publishsettings when done. + azure account import /tmp/azure.publishsettings + ``` + + + +## Creating your first instance [discovery-azure-classic-long-instance] + +You need to have a storage account available. Check [Azure Blob Storage documentation](http://www.windowsazure.com/en-us/develop/net/how-to-guides/blob-storage/#create-account) for more information. + +You will need to choose the operating system you want to run on. To get a list of official available images, run: + +```sh +azure vm image list +``` + +Let’s say we are going to deploy an Ubuntu image on an extra small instance in West Europe: + +Azure cluster name +: `azure-elasticsearch-cluster` + +Image +: `b39f27a8b8c64d52b05eac6a62ebad85__Ubuntu-13_10-amd64-server-20130808-alpha3-en-us-30GB` + +VM Name +: `myesnode1` + +VM Size +: `extrasmall` + +Location +: `West Europe` + +Login +: `elasticsearch` + +Password +: `password1234!!` + +Using command line: + +```sh +azure vm create azure-elasticsearch-cluster \ + b39f27a8b8c64d52b05eac6a62ebad85__Ubuntu-13_10-amd64-server-20130808-alpha3-en-us-30GB \ + --vm-name myesnode1 \ + --location "West Europe" \ + --vm-size extrasmall \ + --ssh 22 \ + --ssh-cert /tmp/azure-certificate.pem \ + elasticsearch password1234\!\! +``` + +You should see something like: + +```text +info: Executing command vm create ++ Looking up image ++ Looking up cloud service ++ Creating cloud service ++ Retrieving storage accounts ++ Configuring certificate ++ Creating VM +info: vm create command OK +``` + +Now, your first instance is started. + +::::{admonition} Working with SSH +:class: tip + +You need to give the private key and username each time you log on your instance: + +```sh +ssh -i ~/.ssh/azure-private.key elasticsearch@myescluster.cloudapp.net +``` + +But you can also define it once in `~/.ssh/config` file: + +```text +Host *.cloudapp.net + User elasticsearch + StrictHostKeyChecking no + UserKnownHostsFile=/dev/null + IdentityFile ~/.ssh/azure-private.key +``` + +:::: + + +Next, you need to install Elasticsearch on your new instance. First, copy your keystore to the instance, then connect to the instance using SSH: + +```sh +scp /tmp/azurekeystore.pkcs12 azure-elasticsearch-cluster.cloudapp.net:/home/elasticsearch +ssh azure-elasticsearch-cluster.cloudapp.net +``` + +Once connected, [install {{es}}](docs-content://deploy-manage/deploy/self-managed/installing-elasticsearch.md). + + +## Install Elasticsearch cloud azure plugin [discovery-azure-classic-long-plugin] + +```sh +# Install the plugin +sudo /usr/share/elasticsearch/bin/elasticsearch-plugin install discovery-azure-classic + +# Configure it +sudo vi /etc/elasticsearch/elasticsearch.yml +``` + +And add the following lines: + +```yaml +# If you don't remember your account id, you may get it with `azure account list` +cloud: + azure: + management: + subscription.id: your_azure_subscription_id + cloud.service.name: your_azure_cloud_service_name + keystore: + path: /home/elasticsearch/azurekeystore.pkcs12 + password: your_password_for_keystore + +discovery: + type: azure + +# Recommended (warning: non durable disk) +# path.data: /mnt/resource/elasticsearch/data +``` + +Start Elasticsearch: + +```sh +sudo systemctl start elasticsearch +``` + +If anything goes wrong, check your logs in `/var/log/elasticsearch`. + + diff --git a/docs/reference/elasticsearch-plugins/discovery-azure-classic-scale.md b/docs/reference/elasticsearch-plugins/discovery-azure-classic-scale.md new file mode 100644 index 0000000000000..6e46c2fdb8186 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-azure-classic-scale.md @@ -0,0 +1,63 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-azure-classic-scale.html +--- + +# Scaling out! [discovery-azure-classic-scale] + +You need first to create an image of your previous machine. Disconnect from your machine and run locally the following commands: + +```sh +# Shutdown the instance +azure vm shutdown myesnode1 + +# Create an image from this instance (it could take some minutes) +azure vm capture myesnode1 esnode-image --delete + +# Note that the previous instance has been deleted (mandatory) +# So you need to create it again and BTW create other instances. + +azure vm create azure-elasticsearch-cluster \ + esnode-image \ + --vm-name myesnode1 \ + --location "West Europe" \ + --vm-size extrasmall \ + --ssh 22 \ + --ssh-cert /tmp/azure-certificate.pem \ + elasticsearch password1234\!\! +``` + +::::{tip} +It could happen that azure changes the endpoint public IP address. DNS propagation could take some minutes before you can connect again using name. You can get from azure the IP address if needed, using: + +```sh +# Look at Network `Endpoints 0 Vip` +azure vm show myesnode1 +``` + +:::: + + +Let’s start more instances! + +```sh +for x in $(seq 2 10) + do + echo "Launching azure instance #$x..." + azure vm create azure-elasticsearch-cluster \ + esnode-image \ + --vm-name myesnode$x \ + --vm-size extrasmall \ + --ssh $((21 + $x)) \ + --ssh-cert /tmp/azure-certificate.pem \ + --connect \ + elasticsearch password1234\!\! + done +``` + +If you want to remove your running instances: + +```sh +azure vm delete myesnode1 +``` + diff --git a/docs/reference/elasticsearch-plugins/discovery-azure-classic-usage.md b/docs/reference/elasticsearch-plugins/discovery-azure-classic-usage.md new file mode 100644 index 0000000000000..caf49d803b8bb --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-azure-classic-usage.md @@ -0,0 +1,97 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-azure-classic-usage.html +--- + +# Azure Virtual Machine discovery [discovery-azure-classic-usage] + +Azure VM discovery allows to use the Azure APIs to perform automatic discovery. Here is a simple sample configuration: + +```yaml +cloud: + azure: + management: + subscription.id: XXX-XXX-XXX-XXX + cloud.service.name: es-demo-app + keystore: + path: /path/to/azurekeystore.pkcs12 + password: WHATEVER + type: pkcs12 + +discovery: + seed_providers: azure +``` + +::::{admonition} Binding the network host +:class: important + +The keystore file must be placed in a directory accessible by Elasticsearch like the `config` directory. + +It’s important to define `network.host` as by default it’s bound to `localhost`. + +You can use [core network host settings](/reference/elasticsearch/configuration-reference/networking-settings.md). For example `_en0_`. + +:::: + + +## How to start (short story) [discovery-azure-classic-short] + +* Create Azure instances +* Install Elasticsearch +* Install Azure plugin +* Modify `elasticsearch.yml` file +* Start Elasticsearch + + +## Azure credential API settings [discovery-azure-classic-settings] + +The following are a list of settings that can further control the credential API: + +`cloud.azure.management.keystore.path` +: /path/to/keystore + +`cloud.azure.management.keystore.type` +: `pkcs12`, `jceks` or `jks`. Defaults to `pkcs12`. + +`cloud.azure.management.keystore.password` +: your_password for the keystore + +`cloud.azure.management.subscription.id` +: your_azure_subscription_id + +`cloud.azure.management.cloud.service.name` +: your_azure_cloud_service_name. This is the cloud service name/DNS but without the `cloudapp.net` part. So if the DNS name is `abc.cloudapp.net` then the `cloud.service.name` to use is just `abc`. + + +## Advanced settings [discovery-azure-classic-settings-advanced] + +The following are a list of settings that can further control the discovery: + +`discovery.azure.host.type` +: Either `public_ip` or `private_ip` (default). Azure discovery will use the one you set to ping other nodes. + +`discovery.azure.endpoint.name` +: When using `public_ip` this setting is used to identify the endpoint name used to forward requests to Elasticsearch (aka transport port name). Defaults to `elasticsearch`. In Azure management console, you could define an endpoint `elasticsearch` forwarding for example requests on public IP on port 8100 to the virtual machine on port 9300. + +`discovery.azure.deployment.name` +: Deployment name if any. Defaults to the value set with `cloud.azure.management.cloud.service.name`. + +`discovery.azure.deployment.slot` +: Either `staging` or `production` (default). + +For example: + +```yaml +discovery: + type: azure + azure: + host: + type: private_ip + endpoint: + name: elasticsearch + deployment: + name: your_azure_cloud_service_name + slot: production +``` + + diff --git a/docs/reference/elasticsearch-plugins/discovery-azure-classic.md b/docs/reference/elasticsearch-plugins/discovery-azure-classic.md new file mode 100644 index 0000000000000..4e8f042c2845a --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-azure-classic.md @@ -0,0 +1,48 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-azure-classic.html +--- + +# Azure Classic discovery plugin [discovery-azure-classic] + +The Azure Classic Discovery plugin uses the Azure Classic API to identify the addresses of seed hosts. + +::::{admonition} Deprecated in 5.0.0. +:class: warning + +This plugin will be removed in the future +:::: + + + +## Installation [discovery-azure-classic-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install discovery-azure-classic +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/discovery-azure-classic/discovery-azure-classic-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/discovery-azure-classic/discovery-azure-classic-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/discovery-azure-classic/discovery-azure-classic-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/discovery-azure-classic/discovery-azure-classic-9.0.0-beta1.zip.asc). + + +## Removal [discovery-azure-classic-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove discovery-azure-classic +``` + +The node must be stopped before removing the plugin. + + + + diff --git a/docs/reference/elasticsearch-plugins/discovery-ec2-usage.md b/docs/reference/elasticsearch-plugins/discovery-ec2-usage.md new file mode 100644 index 0000000000000..1119649bafd1f --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-ec2-usage.md @@ -0,0 +1,158 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-ec2-usage.html +--- + +# Using the EC2 discovery plugin [discovery-ec2-usage] + +The `discovery-ec2` plugin allows {{es}} to find the master-eligible nodes in a cluster running on AWS EC2 by querying the [AWS API](https://github.com/aws/aws-sdk-java) for the addresses of the EC2 instances running these nodes. + +It is normally a good idea to restrict the discovery process just to the master-eligible nodes in the cluster. This plugin allows you to identify these nodes by certain criteria including their tags, their membership of security groups, and their placement within availability zones. The discovery process will work correctly even if it finds master-ineligible nodes, but master elections will be more efficient if this can be avoided. + +The interaction with the AWS API can be authenticated using the [instance role](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.md), or else custom credentials can be supplied. + +## Enabling EC2 discovery [_enabling_ec2_discovery] + +To enable EC2 discovery, configure {{es}} to use the `ec2` seed hosts provider: + +```yaml +discovery.seed_providers: ec2 +``` + + +## Configuring EC2 discovery [_configuring_ec2_discovery] + +EC2 discovery supports a number of settings. Some settings are sensitive and must be stored in the [{{es}} keystore](docs-content://deploy-manage/security/secure-settings.md). For example, to authenticate using a particular access key and secret key, add these keys to the keystore by running the following commands: + +```sh +bin/elasticsearch-keystore add discovery.ec2.access_key +bin/elasticsearch-keystore add discovery.ec2.secret_key +``` + +The available settings for the EC2 discovery plugin are as follows. + +`discovery.ec2.access_key` ({{ref}}/secure-settings.html[Secure], [reloadable](docs-content://deploy-manage/security/secure-settings.md#reloadable-secure-settings)) +: An EC2 access key. If set, you must also set `discovery.ec2.secret_key`. If unset, `discovery-ec2` will instead use the instance role. This setting is sensitive and must be stored in the {{es}} keystore. + +`discovery.ec2.secret_key` ({{ref}}/secure-settings.html[Secure], [reloadable](docs-content://deploy-manage/security/secure-settings.md#reloadable-secure-settings)) +: An EC2 secret key. If set, you must also set `discovery.ec2.access_key`. This setting is sensitive and must be stored in the {{es}} keystore. + +`discovery.ec2.session_token` ({{ref}}/secure-settings.html[Secure], [reloadable](docs-content://deploy-manage/security/secure-settings.md#reloadable-secure-settings)) +: An EC2 session token. If set, you must also set `discovery.ec2.access_key` and `discovery.ec2.secret_key`. This setting is sensitive and must be stored in the {{es}} keystore. + +`discovery.ec2.endpoint` +: The EC2 service endpoint to which to connect. See [https://docs.aws.amazon.com/general/latest/gr/rande.html#ec2_region](https://docs.aws.amazon.com/general/latest/gr/rande.md#ec2_region) to find the appropriate endpoint for the region. This setting defaults to `ec2.us-east-1.amazonaws.com` which is appropriate for clusters running in the `us-east-1` region. + +`discovery.ec2.protocol` +: The protocol to use to connect to the EC2 service endpoint, which may be either `http` or `https`. Defaults to `https`. + +`discovery.ec2.proxy.host` +: The address or host name of an HTTP proxy through which to connect to EC2. If not set, no proxy is used. + +`discovery.ec2.proxy.port` +: When the address of an HTTP proxy is given in `discovery.ec2.proxy.host`, this setting determines the port to use to connect to the proxy. Defaults to `80`. + +`discovery.ec2.proxy.scheme` +: The scheme to use when connecting to the EC2 service endpoint through proxy specified in `discovery.ec2.proxy.host`. Valid values are `http` or `https`. Defaults to `http`. + +`discovery.ec2.proxy.username` ({{ref}}/secure-settings.html[Secure], [reloadable](docs-content://deploy-manage/security/secure-settings.md#reloadable-secure-settings)) +: When the address of an HTTP proxy is given in `discovery.ec2.proxy.host`, this setting determines the username to use to connect to the proxy. When not set, no username is used. This setting is sensitive and must be stored in the {{es}} keystore. + +`discovery.ec2.proxy.password` ({{ref}}/secure-settings.html[Secure], [reloadable](docs-content://deploy-manage/security/secure-settings.md#reloadable-secure-settings)) +: When the address of an HTTP proxy is given in `discovery.ec2.proxy.host`, this setting determines the password to use to connect to the proxy. When not set, no password is used. This setting is sensitive and must be stored in the {{es}} keystore. + +`discovery.ec2.read_timeout` +: The socket timeout for connections to EC2, [including the units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). For example, a value of `60s` specifies a 60-second timeout. Defaults to 50 seconds. + +`discovery.ec2.groups` +: A list of the names or IDs of the security groups to use for discovery. The `discovery.ec2.any_group` setting determines the behaviour of this setting. Defaults to an empty list, meaning that security group membership is ignored by EC2 discovery. + +`discovery.ec2.any_group` +: Defaults to `true`, meaning that instances belonging to *any* of the security groups specified in `discovery.ec2.groups` will be used for discovery. If set to `false`, only instances that belong to *all* of the security groups specified in `discovery.ec2.groups` will be used for discovery. + +`discovery.ec2.host_type` +: Each EC2 instance has a number of different addresses that might be suitable for discovery. This setting allows you to select which of these addresses is used by the discovery process. It can be set to one of `private_ip`, `public_ip`, `private_dns`, `public_dns` or `tag:TAGNAME` where `TAGNAME` refers to a name of a tag. This setting defaults to `private_ip`. + +If you set `discovery.ec2.host_type` to a value of the form `tag:TAGNAME` then the value of the tag `TAGNAME` attached to each instance will be used as that instance’s address for discovery. Instances which do not have this tag set will be ignored by the discovery process. + +For example if you tag some EC2 instances with a tag named `elasticsearch-host-name` and set `host_type: tag:elasticsearch-host-name` then the `discovery-ec2` plugin will read each instance’s host name from the value of the `elasticsearch-host-name` tag. [Read more about EC2 Tags](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.md). + + +`discovery.ec2.availability_zones` +: A list of the names of the availability zones to use for discovery. The name of an availability zone is the [region code followed by a letter](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.md), such as `us-east-1a`. Only instances placed in one of the given availability zones will be used for discovery. + +$$$discovery-ec2-filtering$$$ + +`discovery.ec2.tag.TAGNAME` +: A list of the values of a tag called `TAGNAME` to use for discovery. If set, only instances that are tagged with one of the given values will be used for discovery. For instance, the following settings will only use nodes with a `role` tag set to `master` and an `environment` tag set to either `dev` or `staging`. + +```yaml +discovery.ec2.tag.role: master +discovery.ec2.tag.environment: dev,staging +``` + +::::{note} +The names of tags used for discovery may only contain ASCII letters, numbers, hyphens and underscores. In particular you cannot use tags whose name includes a colon. +:::: + + + +`discovery.ec2.node_cache_time` +: Sets the length of time for which the collection of discovered instances is cached. {{es}} waits at least this long between requests for discovery information from the EC2 API. AWS may reject discovery requests if they are made too often, and this would cause discovery to fail. Defaults to `10s`. + +All **secure** settings of this plugin are [reloadable](docs-content://deploy-manage/security/secure-settings.md#reloadable-secure-settings), allowing you to update the secure settings for this plugin without needing to restart each node. + + +## Recommended EC2 permissions [discovery-ec2-permissions] + +The `discovery-ec2` plugin works by making a `DescribeInstances` call to the AWS EC2 API. You must configure your AWS account to allow this, which is normally done using an IAM policy. You can create a custom policy via the IAM Management Console. It should look similar to this. + +```js +{ + "Statement": [ + { + "Action": [ + "ec2:DescribeInstances" + ], + "Effect": "Allow", + "Resource": [ + "*" + ] + } + ], + "Version": "2012-10-17" +} +``` + + +## Automatic node attributes [discovery-ec2-attributes] + +The `discovery-ec2` plugin can automatically set the `aws_availability_zone` node attribute to the availability zone of each node. This node attribute allows you to ensure that each shard has copies allocated redundantly across multiple availability zones by using the [Allocation Awareness](docs-content://deploy-manage/distributed-architecture/shard-allocation-relocation-recovery/shard-allocation-awareness.md) feature. + +In order to enable the automatic definition of the `aws_availability_zone` attribute, set `cloud.node.auto_attributes` to `true`. For example: + +```yaml +cloud.node.auto_attributes: true +cluster.routing.allocation.awareness.attributes: aws_availability_zone +``` + +The `aws_availability_zone` attribute can be automatically set like this when using any discovery type. It is not necessary to set `discovery.seed_providers: ec2`. However this feature does require that the `discovery-ec2` plugin is installed. + + +## Binding to the correct address [discovery-ec2-network-host] + +It is important to define `network.host` correctly when deploying a cluster on EC2. By default each {{es}} node only binds to `localhost`, which will prevent it from being discovered by nodes running on any other instances. + +You can use the [core network host settings](/reference/elasticsearch/configuration-reference/networking-settings.md) to bind each node to the desired address, or you can set `network.host` to one of the following EC2-specific settings provided by the `discovery-ec2` plugin: + +| EC2 Host Value | Description | +| --- | --- | +| `_ec2:privateIpv4_` | The private IP address (ipv4) of the machine. | +| `_ec2:privateDns_` | The private host of the machine. | +| `_ec2:publicIpv4_` | The public IP address (ipv4) of the machine. | +| `_ec2:publicDns_` | The public host of the machine. | +| `_ec2:privateIp_` | Equivalent to `_ec2:privateIpv4_`. | +| `_ec2:publicIp_` | Equivalent to `_ec2:publicIpv4_`. | +| `_ec2_` | Equivalent to `_ec2:privateIpv4_`. | + +These values are acceptable when using any discovery type. They do not require you to set `discovery.seed_providers: ec2`. However they do require that the `discovery-ec2` plugin is installed. diff --git a/docs/reference/elasticsearch-plugins/discovery-ec2.md b/docs/reference/elasticsearch-plugins/discovery-ec2.md new file mode 100644 index 0000000000000..6bd51772424dc --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-ec2.md @@ -0,0 +1,42 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-ec2.html +--- + +# EC2 Discovery plugin [discovery-ec2] + +The EC2 discovery plugin provides a list of seed addresses to the [discovery process](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/discovery-hosts-providers.md) by querying the [AWS API](https://github.com/aws/aws-sdk-java) for a list of EC2 instances matching certain criteria determined by the [plugin settings](/reference/elasticsearch-plugins/discovery-ec2-usage.md). + +**If you are looking for a hosted solution of {{es}} on AWS, please visit [https://www.elastic.co/cloud.**](https://www.elastic.co/cloud.**) + + +## Installation [discovery-ec2-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install discovery-ec2 +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/discovery-ec2/discovery-ec2-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/discovery-ec2/discovery-ec2-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/discovery-ec2/discovery-ec2-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/discovery-ec2/discovery-ec2-9.0.0-beta1.zip.asc). + + +## Removal [discovery-ec2-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove discovery-ec2 +``` + +The node must be stopped before removing the plugin. + + + diff --git a/docs/reference/elasticsearch-plugins/discovery-gce-network-host.md b/docs/reference/elasticsearch-plugins/discovery-gce-network-host.md new file mode 100644 index 0000000000000..0d800835c146b --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-gce-network-host.md @@ -0,0 +1,35 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-gce-network-host.html +--- + +# GCE Network Host [discovery-gce-network-host] + +When the `discovery-gce` plugin is installed, the following are also allowed as valid network host settings: + +| GCE Host Value | Description | +| --- | --- | +| `_gce:privateIp:X_` | The private IP address of the machine for a given network interface. | +| `_gce:hostname_` | The hostname of the machine. | +| `_gce_` | Same as `_gce:privateIp:0_` (recommended). | + +Examples: + +```yaml +# get the IP address from network interface 1 +network.host: _gce:privateIp:1_ +# Using GCE internal hostname +network.host: _gce:hostname_ +# shortcut for _gce:privateIp:0_ (recommended) +network.host: _gce_ +``` + +## How to start (short story) [discovery-gce-usage-short] + +* Create Google Compute Engine instance (with compute rw permissions) +* Install Elasticsearch +* Install Google Compute Engine Cloud plugin +* Modify `elasticsearch.yml` file +* Start Elasticsearch + + diff --git a/docs/reference/elasticsearch-plugins/discovery-gce-usage-cloning.md b/docs/reference/elasticsearch-plugins/discovery-gce-usage-cloning.md new file mode 100644 index 0000000000000..8af0fa1335c01 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-gce-usage-cloning.md @@ -0,0 +1,62 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-gce-usage-cloning.html +--- + +# Cloning your existing machine [discovery-gce-usage-cloning] + +In order to build a cluster on many nodes, you can clone your configured instance to new nodes. You won’t have to reinstall everything! + +First create an image of your running instance and upload it to Google Cloud Storage: + +```sh +# Create an image of your current instance +sudo /usr/bin/gcimagebundle -d /dev/sda -o /tmp/ + +# An image has been created in `/tmp` directory: +ls /tmp +e4686d7f5bf904a924ae0cfeb58d0827c6d5b966.image.tar.gz + +# Upload your image to Google Cloud Storage: +# Create a bucket to hold your image, let's say `esimage`: +gsutil mb gs://esimage + +# Copy your image to this bucket: +gsutil cp /tmp/e4686d7f5bf904a924ae0cfeb58d0827c6d5b966.image.tar.gz gs://esimage + +# Then add your image to images collection: +gcloud compute images create elasticsearch-2-0-0 --source-uri gs://esimage/e4686d7f5bf904a924ae0cfeb58d0827c6d5b966.image.tar.gz + +# If the previous command did not work for you, logout from your instance +# and launch the same command from your local machine. +``` + +## Start new instances [discovery-gce-usage-start-new-instances] + +As you have now an image, you can create as many instances as you need: + +```sh +# Just change node name (here myesnode2) +gcloud compute instances create myesnode2 --image elasticsearch-2-0-0 --zone europe-west1-a + +# If you want to provide all details directly, you can use: +gcloud compute instances create myesnode2 --image=elasticsearch-2-0-0 \ + --zone europe-west1-a --machine-type f1-micro --scopes=compute-rw +``` + + +## Remove an instance (aka shut it down) [discovery-gce-usage-remove-instance] + +You can use [Google Cloud Console](https://cloud.google.com/console) or CLI to manage your instances: + +```sh +# Stopping and removing instances +gcloud compute instances delete myesnode1 myesnode2 \ + --zone=europe-west1-a + +# Consider removing disk as well if you don't need them anymore +gcloud compute disks delete boot-myesnode1 boot-myesnode2 \ + --zone=europe-west1-a +``` + + diff --git a/docs/reference/elasticsearch-plugins/discovery-gce-usage-long.md b/docs/reference/elasticsearch-plugins/discovery-gce-usage-long.md new file mode 100644 index 0000000000000..ed70da9c872c1 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-gce-usage-long.md @@ -0,0 +1,128 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-gce-usage-long.html +--- + +# Setting up GCE Discovery [discovery-gce-usage-long] + +## Prerequisites [discovery-gce-usage-long-prerequisites] + +Before starting, you need: + +* Your project ID, e.g. `es-cloud`. Get it from [Google API Console](https://code.google.com/apis/console/). +* To install [Google Cloud SDK](https://developers.google.com/cloud/sdk/) + +If you did not set it yet, you can define your default project you will work on: + +```sh +gcloud config set project es-cloud +``` + + +## Login to Google Cloud [discovery-gce-usage-long-login] + +If you haven’t already, login to Google Cloud + +```sh +gcloud auth login +``` + +This will open your browser. You will be asked to sign-in to a Google account and authorize access to the Google Cloud SDK. + + +## Creating your first instance [discovery-gce-usage-long-first-instance] + +```sh +gcloud compute instances create myesnode1 \ + --zone \ + --scopes compute-rw +``` + +When done, a report like this one should appears: + +```text +Created [https://www.googleapis.com/compute/v1/projects/es-cloud-1070/zones/us-central1-f/instances/myesnode1]. +NAME ZONE MACHINE_TYPE PREEMPTIBLE INTERNAL_IP EXTERNAL_IP STATUS +myesnode1 us-central1-f n1-standard-1 10.240.133.54 104.197.94.25 RUNNING +``` + +You can now connect to your instance: + +```sh +# Connect using google cloud SDK +gcloud compute ssh myesnode1 --zone europe-west1-a + +# Or using SSH with external IP address +ssh -i ~/.ssh/google_compute_engine 192.158.29.199 +``` + +::::{admonition} Service Account Permissions +:class: important + +It’s important when creating an instance that the correct permissions are set. At a minimum, you must ensure you have: + +```text +scopes=compute-rw +``` + +Failing to set this will result in unauthorized messages when starting Elasticsearch. See [Machine Permissions](/reference/elasticsearch-plugins/discovery-gce-usage-tips.md#discovery-gce-usage-tips-permissions). + +:::: + + +Once connected, [install {{es}}](docs-content://deploy-manage/deploy/self-managed/installing-elasticsearch.md). + + +## Install Elasticsearch discovery gce plugin [discovery-gce-usage-long-install-plugin] + +Install the plugin: + +```sh +# Use Plugin Manager to install it +sudo bin/elasticsearch-plugin install discovery-gce +``` + +Open the `elasticsearch.yml` file: + +```sh +sudo vi /etc/elasticsearch/elasticsearch.yml +``` + +And add the following lines: + +```yaml +cloud: + gce: + project_id: es-cloud + zone: europe-west1-a +discovery: + seed_providers: gce +``` + +Start Elasticsearch: + +```sh +sudo systemctl start elasticsearch +``` + +If anything goes wrong, you should check logs: + +```sh +tail -f /var/log/elasticsearch/elasticsearch.log +``` + +If needed, you can change log level to `trace` by opening `log4j2.properties`: + +```sh +sudo vi /etc/elasticsearch/log4j2.properties +``` + +and adding the following line: + +```yaml +# discovery +logger.discovery_gce.name = discovery.gce +logger.discovery_gce.level = trace +``` + + diff --git a/docs/reference/elasticsearch-plugins/discovery-gce-usage-port.md b/docs/reference/elasticsearch-plugins/discovery-gce-usage-port.md new file mode 100644 index 0000000000000..8528910246e52 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-gce-usage-port.md @@ -0,0 +1,35 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-gce-usage-port.html +--- + +# Changing default transport port [discovery-gce-usage-port] + +By default, Elasticsearch GCE plugin assumes that you run Elasticsearch on 9300 default port. But you can specify the port value Elasticsearch is meant to use using google compute engine metadata `es_port`: + +## When creating instance [discovery-gce-usage-port-create] + +Add `--metadata es_port=9301` option: + +```sh +# when creating first instance +gcloud compute instances create myesnode1 \ + --scopes=compute-rw,storage-full \ + --metadata es_port=9301 + +# when creating an instance from an image +gcloud compute instances create myesnode2 --image=elasticsearch-1-0-0-RC1 \ + --zone europe-west1-a --machine-type f1-micro --scopes=compute-rw \ + --metadata es_port=9301 +``` + + +## On a running instance [discovery-gce-usage-port-run] + +```sh +gcloud compute instances add-metadata myesnode1 \ + --zone europe-west1-a \ + --metadata es_port=9301 +``` + + diff --git a/docs/reference/elasticsearch-plugins/discovery-gce-usage-tags.md b/docs/reference/elasticsearch-plugins/discovery-gce-usage-tags.md new file mode 100644 index 0000000000000..d07a9b2ae6428 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-gce-usage-tags.md @@ -0,0 +1,32 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-gce-usage-tags.html +--- + +# Filtering by tags [discovery-gce-usage-tags] + +The GCE discovery can also filter machines to include in the cluster based on tags using `discovery.gce.tags` settings. For example, setting `discovery.gce.tags` to `dev` will only filter instances having a tag set to `dev`. Several tags set will require all of those tags to be set for the instance to be included. + +One practical use for tag filtering is when a GCE cluster contains many nodes that are not master-eligible {{es}} nodes. In this case, tagging the GCE instances that *are* running the master-eligible {{es}} nodes, and then filtering by that tag, will help discovery to run more efficiently. + +Add your tag when building the new instance: + +```sh +gcloud compute instances create myesnode1 --project=es-cloud \ + --scopes=compute-rw \ + --tags=elasticsearch,dev +``` + +Then, define it in `elasticsearch.yml`: + +```yaml +cloud: + gce: + project_id: es-cloud + zone: europe-west1-a +discovery: + seed_providers: gce + gce: + tags: elasticsearch, dev +``` + diff --git a/docs/reference/elasticsearch-plugins/discovery-gce-usage-testing.md b/docs/reference/elasticsearch-plugins/discovery-gce-usage-testing.md new file mode 100644 index 0000000000000..9edea7b8c7efb --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-gce-usage-testing.md @@ -0,0 +1,26 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-gce-usage-testing.html +--- + +# Testing GCE [discovery-gce-usage-testing] + +Integrations tests in this plugin require working GCE configuration and therefore disabled by default. To enable tests prepare a config file elasticsearch.yml with the following content: + +```yaml +cloud: + gce: + project_id: es-cloud + zone: europe-west1-a +discovery: + seed_providers: gce +``` + +Replace `project_id` and `zone` with your settings. + +To run test: + +```sh +mvn -Dtests.gce=true -Dtests.config=/path/to/config/file/elasticsearch.yml clean test +``` + diff --git a/docs/reference/elasticsearch-plugins/discovery-gce-usage-tips.md b/docs/reference/elasticsearch-plugins/discovery-gce-usage-tips.md new file mode 100644 index 0000000000000..c0347707486f3 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-gce-usage-tips.md @@ -0,0 +1,52 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-gce-usage-tips.html +--- + +# GCE Tips [discovery-gce-usage-tips] + +## Store project id locally [discovery-gce-usage-tips-projectid] + +If you don’t want to repeat the project id each time, you can save it in the local gcloud config + +```sh +gcloud config set project es-cloud +``` + + +## Machine Permissions [discovery-gce-usage-tips-permissions] + +If you have created a machine without the correct permissions, you will see `403 unauthorized` error messages. To change machine permission on an existing instance, first stop the instance then Edit. Scroll down to `Access Scopes` to change permission. The other way to alter these permissions is to delete the instance (NOT THE DISK). Then create another with the correct permissions. + +Creating machines with gcloud +: Ensure the following flags are set: + +```text +--scopes=compute-rw +``` + + +Creating with console (web) +: When creating an instance using the web console, scroll down to **Identity and API access**. + +Select a service account with the correct permissions or choose **Compute Engine default service account** and select **Allow default access** for **Access scopes**. + + +Creating with knife google +: Set the service account scopes when creating the machine: + +```sh +knife google server create www1 \ + -m n1-standard-1 \ + -I debian-8 \ + -Z us-central1-a \ + -i ~/.ssh/id_rsa \ + -x jdoe \ + --gce-service-account-scopes https://www.googleapis.com/auth/compute +``` + +Or, you may use the alias: + +```sh + --gce-service-account-scopes compute-rw +``` diff --git a/docs/reference/elasticsearch-plugins/discovery-gce-usage-zones.md b/docs/reference/elasticsearch-plugins/discovery-gce-usage-zones.md new file mode 100644 index 0000000000000..5f71aab0ea4a9 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-gce-usage-zones.md @@ -0,0 +1,20 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-gce-usage-zones.html +--- + +# Using GCE zones [discovery-gce-usage-zones] + +`cloud.gce.zone` helps to retrieve instances running in a given zone. It should be one of the [GCE supported zones](https://developers.google.com/compute/docs/zones#available). + +The GCE discovery can support multi zones although you need to be aware of network latency between zones. To enable discovery across more than one zone, just enter add your zone list to `cloud.gce.zone` setting: + +```yaml +cloud: + gce: + project_id: + zone: ["", ""] +discovery: + seed_providers: gce +``` + diff --git a/docs/reference/elasticsearch-plugins/discovery-gce-usage.md b/docs/reference/elasticsearch-plugins/discovery-gce-usage.md new file mode 100644 index 0000000000000..16cad1fe33e96 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-gce-usage.md @@ -0,0 +1,51 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-gce-usage.html +--- + +# GCE Virtual Machine discovery [discovery-gce-usage] + +Google Compute Engine VM discovery allows to use the google APIs to perform automatic discovery of seed hosts. Here is a simple sample configuration: + +```yaml +cloud: + gce: + project_id: + zone: +discovery: + seed_providers: gce +``` + +The following gce settings (prefixed with `cloud.gce`) are supported: + +`project_id` +: Your Google project id. By default the project id will be derived from the instance metadata. + + ``` + Note: Deriving the project id from system properties or environment variables + (`GOOGLE_CLOUD_PROJECT` or `GCLOUD_PROJECT`) is not supported. + ``` + + +`zone` +: helps to retrieve instances running in a given zone. It should be one of the [GCE supported zones](https://developers.google.com/compute/docs/zones#available). By default the zone will be derived from the instance metadata. See also [Using GCE zones](/reference/elasticsearch-plugins/discovery-gce-usage-zones.md). + +`retry` +: If set to `true`, client will use [ExponentialBackOff](https://developers.google.com/api-client-library/java/google-http-java-client/backoff) policy to retry the failed http request. Defaults to `true`. + +`max_wait` +: The maximum elapsed time after the client instantiating retry. If the time elapsed goes past the `max_wait`, client stops to retry. A negative value means that it will wait indefinitely. Defaults to `0s` (retry indefinitely). + +`refresh_interval` +: How long the list of hosts is cached to prevent further requests to the GCE API. `0s` disables caching. A negative value will cause infinite caching. Defaults to `0s`. + +::::{admonition} Binding the network host +:class: important + +It’s important to define `network.host` as by default it’s bound to `localhost`. + +You can use [core network host settings](/reference/elasticsearch/configuration-reference/networking-settings.md) or [gce specific host settings](/reference/elasticsearch-plugins/discovery-gce-network-host.md): + +:::: + + diff --git a/docs/reference/elasticsearch-plugins/discovery-gce.md b/docs/reference/elasticsearch-plugins/discovery-gce.md new file mode 100644 index 0000000000000..6eb99dd296eb7 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-gce.md @@ -0,0 +1,47 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery-gce.html +--- + +# GCE Discovery plugin [discovery-gce] + +The Google Compute Engine Discovery plugin uses the GCE API to identify the addresses of seed hosts. + + +## Installation [discovery-gce-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install discovery-gce +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/discovery-gce/discovery-gce-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/discovery-gce/discovery-gce-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/discovery-gce/discovery-gce-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/discovery-gce/discovery-gce-9.0.0-beta1.zip.asc). + + +## Removal [discovery-gce-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove discovery-gce +``` + +The node must be stopped before removing the plugin. + + + + + + + + + + diff --git a/docs/reference/elasticsearch-plugins/discovery-plugins.md b/docs/reference/elasticsearch-plugins/discovery-plugins.md new file mode 100644 index 0000000000000..80b91df9db9f7 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/discovery-plugins.md @@ -0,0 +1,26 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/discovery.html +--- + +# Discovery plugins [discovery] + +Discovery plugins extend Elasticsearch by adding new seed hosts providers that can be used to extend the [cluster formation module](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation.md). + + +## Core discovery plugins [_core_discovery_plugins] + +The core discovery plugins are: + +[EC2 discovery](/reference/elasticsearch-plugins/discovery-ec2.md) +: The EC2 discovery plugin uses the [AWS API](https://github.com/aws/aws-sdk-java) to identify the addresses of seed hosts. + +[Azure Classic discovery](/reference/elasticsearch-plugins/discovery-azure-classic.md) +: The Azure Classic discovery plugin uses the Azure Classic API to identify the addresses of seed hosts. + +[GCE discovery](/reference/elasticsearch-plugins/discovery-gce.md) +: The Google Compute Engine discovery plugin uses the GCE API to identify the addresses of seed hosts. + + + + diff --git a/docs/reference/elasticsearch-plugins/index.md b/docs/reference/elasticsearch-plugins/index.md new file mode 100644 index 0000000000000..62f6931136ed7 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/index.md @@ -0,0 +1,27 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/index.html + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/intro.html +--- + +# Elasticsearch plugins [intro] + +Plugins are a way to enhance the core Elasticsearch functionality in a custom manner. They range from adding custom mapping types, custom analyzers, native scripts, custom discovery and more. + +Plugins contain JAR files, but may also contain scripts and config files, and must be installed on every node in the cluster. After installation, each node must be restarted before the plugin becomes visible. + +::::{note} +A full cluster restart is required for installing plugins that have custom cluster state metadata. It is still possible to upgrade such plugins with a rolling restart. +:::: + + +This documentation distinguishes two categories of plugins: + +Core Plugins +: This category identifies plugins that are part of Elasticsearch project. Delivered at the same time as Elasticsearch, their version number always matches the version number of Elasticsearch itself. These plugins are maintained by the Elastic team with the appreciated help of amazing community members (for open source plugins). Issues and bug reports can be reported on the [Github project page](https://github.com/elastic/elasticsearch). + +Community contributed +: This category identifies plugins that are external to the Elasticsearch project. They are provided by individual developers or private companies and have their own licenses as well as their own versioning system. Issues and bug reports can usually be reported on the community plugin’s web site. + +For advice on writing your own plugin, refer to [*Creating an {{es}} plugin*](/extend/index.md). + diff --git a/docs/reference/elasticsearch-plugins/installation.md b/docs/reference/elasticsearch-plugins/installation.md new file mode 100644 index 0000000000000..a7693083334b1 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/installation.md @@ -0,0 +1,26 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/installation.html +--- + +# Installing plugins [installation] + +The documentation for each plugin usually includes specific installation instructions for that plugin, but below we document the various available options: + + +## Core Elasticsearch plugins [_core_elasticsearch_plugins] + +Core Elasticsearch plugins can be installed as follows: + +```shell +sudo bin/elasticsearch-plugin install [plugin_name] +``` + +For instance, to install the core [ICU plugin](/reference/elasticsearch-plugins/analysis-icu.md), just run the following command: + +```shell +sudo bin/elasticsearch-plugin install analysis-icu +``` + +This command will install the version of the plugin that matches your Elasticsearch version and also show a progress bar while downloading. + diff --git a/docs/reference/elasticsearch-plugins/installing-multiple-plugins.md b/docs/reference/elasticsearch-plugins/installing-multiple-plugins.md new file mode 100644 index 0000000000000..cecaf12ef5f0c --- /dev/null +++ b/docs/reference/elasticsearch-plugins/installing-multiple-plugins.md @@ -0,0 +1,23 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/installing-multiple-plugins.html +--- + +# Installing multiple plugins [installing-multiple-plugins] + +Multiple plugins can be installed in one invocation as follows: + +```shell +sudo bin/elasticsearch-plugin install [plugin_id] [plugin_id] ... [plugin_id] +``` + +Each `plugin_id` can be any valid form for installing a single plugin (e.g., the name of a core plugin, or a custom URL). + +For instance, to install the core [ICU plugin](/reference/elasticsearch-plugins/analysis-icu.md), run the following command: + +```shell +sudo bin/elasticsearch-plugin install analysis-icu +``` + +This command will install the versions of the plugins that matches your Elasticsearch version. The installation will be treated as a transaction, so that all the plugins will be installed, or none of the plugins will be installed if any installation fails. + diff --git a/docs/reference/elasticsearch-plugins/integrations.md b/docs/reference/elasticsearch-plugins/integrations.md new file mode 100644 index 0000000000000..d830ddb07df15 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/integrations.md @@ -0,0 +1,97 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/integrations.html +--- + +# Integrations [integrations] + +Integrations are not plugins, but are external tools or modules that make it easier to work with Elasticsearch. + + +## CMS integrations [cms-integrations] + + +### Supported by the community: [_supported_by_the_community] + +* [ElasticPress](https://wordpress.org/plugins/elasticpress/): Elasticsearch WordPress Plugin +* [Tiki Wiki CMS Groupware](https://doc.tiki.org/Elasticsearch): Tiki has native support for Elasticsearch. This provides faster & better search (facets, etc), along with some Natural Language Processing features (ex.: More like this) +* [XWiki Next Generation Wiki](https://extensions.xwiki.org/xwiki/bin/view/Extension/Elastic+Search+Macro/): XWiki has an Elasticsearch and Kibana macro allowing to run Elasticsearch queries and display the results in XWiki pages using XWiki’s scripting language as well as include Kibana Widgets in XWiki pages + + +### Supported by Elastic: [_supported_by_elastic] + +* [Logstash output to Elasticsearch](logstash://docs/reference/plugins-outputs-elasticsearch.md): The Logstash `elasticsearch` output plugin. +* [Elasticsearch input to Logstash](logstash://docs/reference/plugins-inputs-elasticsearch.md) The Logstash `elasticsearch` input plugin. +* [Elasticsearch event filtering in Logstash](logstash://docs/reference/plugins-filters-elasticsearch.md) The Logstash `elasticsearch` filter plugin. +* [Elasticsearch bulk codec](logstash://docs/reference/plugins-codecs-es_bulk.md) The Logstash `es_bulk` plugin decodes the Elasticsearch bulk format into individual events. + + +### Supported by the community: [_supported_by_the_community_2] + +* [Ingest processor template](https://github.com/spinscale/cookiecutter-elasticsearch-ingest-processor): A template for creating new ingest processors. +* [Kafka Standalone Consumer (Indexer)](https://github.com/BigDataDevs/kafka-elasticsearch-consumer): Kafka Standalone Consumer [Indexer] will read messages from Kafka in batches, processes(as implemented) and bulk-indexes them into Elasticsearch. Flexible and scalable. More documentation in above GitHub repo’s Wiki. +* [Scrutineer](https://github.com/Aconex/scrutineer): A high performance consistency checker to compare what you’ve indexed with your source of truth content (e.g. DB) +* [FS Crawler](https://github.com/dadoonet/fscrawler): The File System (FS) crawler allows to index documents (PDF, Open Office…​) from your local file system and over SSH. (by David Pilato) +* [Elasticsearch Evolution](https://github.com/senacor/elasticsearch-evolution): A library to migrate elasticsearch mappings. +* [PGSync](https://pgsync.com): A tool for syncing data from Postgres to Elasticsearch. + + +## Deployment [deployment] + + +### Supported by the community: [_supported_by_the_community_3] + +* [Ansible](https://github.com/elastic/ansible-elasticsearch): Ansible playbook for Elasticsearch. +* [Puppet](https://github.com/elastic/puppet-elasticsearch): Elasticsearch puppet module. +* [Chef](https://github.com/elastic/cookbook-elasticsearch): Chef cookbook for Elasticsearch + + +## Framework integrations [framework-integrations] + + +### Supported by the community: [_supported_by_the_community_4] + +* [Apache Camel Integration](https://camel.apache.org/components/2.x/elasticsearch-component.md): An Apache camel component to integrate Elasticsearch +* [Catmandu](https://metacpan.org/pod/Catmandu::Store::ElasticSearch): An Elasticsearch backend for the Catmandu framework. +* [FOSElasticaBundle](https://github.com/FriendsOfSymfony/FOSElasticaBundle): Symfony2 Bundle wrapping Elastica. +* [Grails](https://plugins.grails.org/plugin/puneetbehl/elasticsearch): Elasticsearch Grails plugin. +* [Hibernate Search](https://hibernate.org/search/) Integration with Hibernate ORM, from the Hibernate team. Automatic synchronization of write operations, yet exposes full Elasticsearch capabilities for queries. Can return either Elasticsearch native or re-map queries back into managed entities loaded within transactions from the reference database. +* [Spring Data Elasticsearch](https://github.com/spring-projects/spring-data-elasticsearch): Spring Data implementation for Elasticsearch +* [Spring Elasticsearch](https://github.com/dadoonet/spring-elasticsearch): Spring Factory for Elasticsearch +* [Zeebe](https://zeebe.io): An Elasticsearch exporter acts as a bridge between Zeebe and Elasticsearch +* [Apache Pulsar](https://pulsar.apache.org/docs/en/io-elasticsearch): The Elasticsearch Sink Connector is used to pull messages from Pulsar topics and persist the messages to an index. +* [Micronaut Elasticsearch Integration](https://micronaut-projects.github.io/micronaut-elasticsearch/latest/guide/index.md): Integration of Micronaut with Elasticsearch +* [Apache StreamPipes](https://streampipes.apache.org): StreamPipes is a framework that enables users to work with IoT data sources. +* [Apache MetaModel](https://metamodel.apache.org/): Providing a common interface for discovery, exploration of metadata and querying of different types of data sources. +* [Micrometer](https://micrometer.io): Vendor-neutral application metrics facade. Think SLF4j, but for metrics. + + +## Hadoop integrations [hadoop-integrations] + + +### Supported by Elastic: [_supported_by_elastic_2] + +* [es-hadoop](elasticsearch-hadoop://docs/reference/preface.md): Elasticsearch real-time search and analytics natively integrated with Hadoop. Supports Map/Reduce, Cascading, Apache Hive, Apache Pig, Apache Spark and Apache Storm. + + +### Supported by the community: [_supported_by_the_community_5] + +* [Garmadon](https://github.com/criteo/garmadon): Garmadon is a solution for Hadoop Cluster realtime introspection. + + +## Health and Performance Monitoring [monitoring-integrations] + + +### Supported by the community: [_supported_by_the_community_6] + +* [SPM for Elasticsearch](https://sematext.com/spm/index.md): Performance monitoring with live charts showing cluster and node stats, integrated alerts, email reports, etc. +* [Zabbix monitoring template](https://www.zabbix.com/integrations/elasticsearch): Monitor the performance and status of your {{es}} nodes and cluster with Zabbix and receive events information. + + +## Other integrations [other-integrations] + + +### Supported by the community: [_supported_by_the_community_7] + +* [Wireshark](https://www.wireshark.org/): Protocol dissection for HTTP and the transport protocol +* [ItemsAPI](https://www.itemsapi.com/): Search backend for mobile and web diff --git a/docs/reference/elasticsearch-plugins/listing-removing-updating.md b/docs/reference/elasticsearch-plugins/listing-removing-updating.md new file mode 100644 index 0000000000000..917c9bd093960 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/listing-removing-updating.md @@ -0,0 +1,50 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/listing-removing-updating.html +--- + +# Listing, removing and updating installed plugins [listing-removing-updating] + + +## Listing plugins [_listing_plugins] + +A list of the currently loaded plugins can be retrieved with the `list` option: + +```shell +sudo bin/elasticsearch-plugin list +``` + +Alternatively, use the [node-info API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-nodes-info) to find out which plugins are installed on each node in the cluster + + +## Removing plugins [_removing_plugins] + +Plugins can be removed manually, by deleting the appropriate directory under `plugins/`, or using the public script: + +```shell +sudo bin/elasticsearch-plugin remove [pluginname] +``` + +After a Java plugin has been removed, you will need to restart the node to complete the removal process. + +By default, plugin configuration files (if any) are preserved on disk; this is so that configuration is not lost while upgrading a plugin. If you wish to purge the configuration files while removing a plugin, use `-p` or `--purge`. This can option can be used after a plugin is removed to remove any lingering configuration files. + + +## Removing multiple plugins [removing-multiple-plugins] + +Multiple plugins can be removed in one invocation as follows: + +```shell +sudo bin/elasticsearch-plugin remove [pluginname] [pluginname] ... [pluginname] +``` + + +## Updating plugins [_updating_plugins] + +Except for text analysis plugins that are created using the [stable plugin API](/extend/creating-stable-plugins.md), plugins are built for a specific version of {{es}}, and must be reinstalled each time {{es}} is updated. + +```shell +sudo bin/elasticsearch-plugin remove [pluginname] +sudo bin/elasticsearch-plugin install [pluginname] +``` + diff --git a/docs/reference/elasticsearch-plugins/manage-plugins-using-configuration-file.md b/docs/reference/elasticsearch-plugins/manage-plugins-using-configuration-file.md new file mode 100644 index 0000000000000..49252650971a4 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/manage-plugins-using-configuration-file.md @@ -0,0 +1,38 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/manage-plugins-using-configuration-file.html +--- + +# Manage plugins using a configuration file [manage-plugins-using-configuration-file] + +::::{admonition} Docker only +:class: important + +This feature is only available for [official {{es}} Docker images](https://www.docker.elastic.co/). Other {{es}} distributions will not start with a plugin configuration file. + +:::: + + +If you run {{es}} using Docker, you can manage plugins using a declarative configuration file. When {{es}} starts up, it will compare the plugins in the file with those that are currently installed, and add or remove plugins as required. {{es}} will also upgrade official plugins when you upgrade {{es}} itself. + +The file is called `elasticsearch-plugins.yml`, and must be placed in the Elasticsearch configuration directory, alongside `elasticsearch.yml`. Here is an example: + +```yaml +plugins: + - id: analysis-icu + - id: repository-azure + - id: custom-mapper + location: https://example.com/archive/custom-mapper-1.0.0.zip +``` + +This example installs the official `analysis-icu` and `repository-azure` plugins, and one unofficial plugin. Every plugin must provide an `id`. Unofficial plugins must also provide a `location`. This is typically a URL, but Maven coordinates are also supported. The downloaded plugin’s name must match the ID in the configuration file. + +While {{es}} will respect the [standard Java proxy system properties](https://docs.oracle.com/javase/8/docs/technotes/guides/net/proxies.md) when downloading plugins, you can also configure an HTTP proxy to use explicitly in the configuration file. For example: + +```yaml +plugins: + - id: custom-mapper + location: https://example.com/archive/custom-mapper-1.0.0.zip +proxy: proxy.example.com:8443 +``` + diff --git a/docs/reference/elasticsearch-plugins/mandatory-plugins.md b/docs/reference/elasticsearch-plugins/mandatory-plugins.md new file mode 100644 index 0000000000000..4635178cbdf5b --- /dev/null +++ b/docs/reference/elasticsearch-plugins/mandatory-plugins.md @@ -0,0 +1,15 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/mandatory-plugins.html +--- + +# Mandatory plugins [mandatory-plugins] + +If you rely on some plugins, you can define mandatory plugins by adding `plugin.mandatory` setting to the `config/elasticsearch.yml` file, for example: + +```yaml +plugin.mandatory: analysis-icu,lang-js +``` + +For safety reasons, a node will not start if it is missing a mandatory plugin. + diff --git a/docs/reference/elasticsearch-plugins/mapper-annotated-text-highlighter.md b/docs/reference/elasticsearch-plugins/mapper-annotated-text-highlighter.md new file mode 100644 index 0000000000000..c3be476424b31 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/mapper-annotated-text-highlighter.md @@ -0,0 +1,48 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/mapper-annotated-text-highlighter.html +--- + +# Using the annotated highlighter [mapper-annotated-text-highlighter] + +The `annotated-text` plugin includes a custom highlighter designed to mark up search hits in a way which is respectful of the original markup: + +```console +# Example documents +PUT my-index-000001/_doc/1 +{ + "my_field": "The cat sat on the [mat](sku3578)" +} + +GET my-index-000001/_search +{ + "query": { + "query_string": { + "query": "cats" + } + }, + "highlight": { + "fields": { + "my_field": { + "type": "annotated", <1> + "require_field_match": false + } + } + } +} +``` + +1. The `annotated` highlighter type is designed for use with annotated_text fields + + +The annotated highlighter is based on the `unified` highlighter and supports the same settings but does not use the `pre_tags` or `post_tags` parameters. Rather than using html-like markup such as `cat` the annotated highlighter uses the same markdown-like syntax used for annotations and injects a key=value annotation where `_hit_term` is the key and the matched search term is the value e.g. + +``` +The [cat](_hit_term=cat) sat on the [mat](sku3578) +``` +The annotated highlighter tries to be respectful of any existing markup in the original text: + +* If the search term matches exactly the location of an existing annotation then the `_hit_term` key is merged into the url-like syntax used in the `(...)` part of the existing annotation. +* However, if the search term overlaps the span of an existing annotation it would break the markup formatting so the original annotation is removed in favour of a new annotation with just the search hit information in the results. +* Any non-overlapping annotations in the original text are preserved in highlighter selections + diff --git a/docs/reference/elasticsearch-plugins/mapper-annotated-text-limitations.md b/docs/reference/elasticsearch-plugins/mapper-annotated-text-limitations.md new file mode 100644 index 0000000000000..58747f136c64e --- /dev/null +++ b/docs/reference/elasticsearch-plugins/mapper-annotated-text-limitations.md @@ -0,0 +1,12 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/mapper-annotated-text-limitations.html +--- + +# Limitations [mapper-annotated-text-limitations] + +The annotated_text field type supports the same mapping settings as the `text` field type but with the following exceptions: + +* No support for `fielddata` or `fielddata_frequency_filter` +* No support for `index_prefixes` or `index_phrases` indexing + diff --git a/docs/reference/elasticsearch-plugins/mapper-annotated-text-tips.md b/docs/reference/elasticsearch-plugins/mapper-annotated-text-tips.md new file mode 100644 index 0000000000000..05779014cb41c --- /dev/null +++ b/docs/reference/elasticsearch-plugins/mapper-annotated-text-tips.md @@ -0,0 +1,96 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/mapper-annotated-text-tips.html +--- + +# Data modelling tips [mapper-annotated-text-tips] + +## Use structured and unstructured fields [_use_structured_and_unstructured_fields] + +Annotations are normally a way of weaving structured information into unstructured text for higher-precision search. + +`Entity resolution` is a form of document enrichment undertaken by specialist software or people where references to entities in a document are disambiguated by attaching a canonical ID. The ID is used to resolve any number of aliases or distinguish between people with the same name. The hyperlinks connecting Wikipedia’s articles are a good example of resolved entity IDs woven into text. + +These IDs can be embedded as annotations in an annotated_text field but it often makes sense to include them in dedicated structured fields to support discovery via aggregations: + +```console +PUT my-index-000001 +{ + "mappings": { + "properties": { + "my_unstructured_text_field": { + "type": "annotated_text" + }, + "my_structured_people_field": { + "type": "text", + "fields": { + "keyword" : { + "type": "keyword" + } + } + } + } + } +} +``` + +Applications would then typically provide content and discover it as follows: + +```console +# Example documents +PUT my-index-000001/_doc/1 +{ + "my_unstructured_text_field": "[Shay](%40kimchy) created elasticsearch", + "my_twitter_handles": ["@kimchy"] <1> +} + +GET my-index-000001/_search +{ + "query": { + "query_string": { + "query": "elasticsearch OR logstash OR kibana",<2> + "default_field": "my_unstructured_text_field" + } + }, + "aggregations": { + "top_people" :{ + "significant_terms" : { <3> + "field" : "my_twitter_handles.keyword" + } + } + } +} +``` + +1. Note the `my_twitter_handles` contains a list of the annotation values also used in the unstructured text. (Note the annotated_text syntax requires escaping). By repeating the annotation values in a structured field this application has ensured that the tokens discovered in the structured field can be used for search and highlighting in the unstructured field. +2. In this example we search for documents that talk about components of the elastic stack +3. We use the `my_twitter_handles` field here to discover people who are significantly associated with the elastic stack. + + + +## Avoiding over-matching annotations [_avoiding_over_matching_annotations] + +By design, the regular text tokens and the annotation tokens co-exist in the same indexed field but in rare cases this can lead to some over-matching. + +The value of an annotation often denotes a *named entity* (a person, place or company). The tokens for these named entities are inserted untokenized, and differ from typical text tokens because they are normally: + +* Mixed case e.g. `Madonna` +* Multiple words e.g. `Jeff Beck` +* Can have punctuation or numbers e.g. `Apple Inc.` or `@kimchy` + +This means, for the most part, a search for a named entity in the annotated text field will not have any false positives e.g. when selecting `Apple Inc.` from an aggregation result you can drill down to highlight uses in the text without "over matching" on any text tokens like the word `apple` in this context: + +``` +the apple was very juicy +``` +However, a problem arises if your named entity happens to be a single term and lower-case e.g. the company `elastic`. In this case, a search on the annotated text field for the token `elastic` may match a text document such as this: + +``` +they fired an elastic band +``` +To avoid such false matches users should consider prefixing annotation values to ensure they don’t name clash with text tokens e.g. + +``` +[elastic](Company_elastic) released version 7.0 of the elastic stack today +``` + diff --git a/docs/reference/elasticsearch-plugins/mapper-annotated-text-usage.md b/docs/reference/elasticsearch-plugins/mapper-annotated-text-usage.md new file mode 100644 index 0000000000000..c67f5dd5280c0 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/mapper-annotated-text-usage.md @@ -0,0 +1,223 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/mapper-annotated-text-usage.html +--- + +# Using the annotated-text field [mapper-annotated-text-usage] + +The `annotated-text` tokenizes text content as per the more common [`text`](/reference/elasticsearch/mapping-reference/text.md) field (see "limitations" below) but also injects any marked-up annotation tokens directly into the search index: + +```console +PUT my-index-000001 +{ + "mappings": { + "properties": { + "my_field": { + "type": "annotated_text" + } + } + } +} +``` + +Such a mapping would allow marked-up text eg wikipedia articles to be indexed as both text and structured tokens. The annotations use a markdown-like syntax using URL encoding of one or more values separated by the `&` symbol. + +We can use the "_analyze" api to test how an example annotation would be stored as tokens in the search index: + +```js +GET my-index-000001/_analyze +{ + "field": "my_field", + "text":"Investors in [Apple](Apple+Inc.) rejoiced." +} +``` + +Response: + +```js +{ + "tokens": [ + { + "token": "investors", + "start_offset": 0, + "end_offset": 9, + "type": "", + "position": 0 + }, + { + "token": "in", + "start_offset": 10, + "end_offset": 12, + "type": "", + "position": 1 + }, + { + "token": "Apple Inc.", <1> + "start_offset": 13, + "end_offset": 18, + "type": "annotation", + "position": 2 + }, + { + "token": "apple", + "start_offset": 13, + "end_offset": 18, + "type": "", + "position": 2 + }, + { + "token": "rejoiced", + "start_offset": 19, + "end_offset": 27, + "type": "", + "position": 3 + } + ] +} +``` + +1. Note the whole annotation token `Apple Inc.` is placed, unchanged as a single token in the token stream and at the same position (position 2) as the text token (`apple`) it annotates. + + +We can now perform searches for annotations using regular `term` queries that don’t tokenize the provided search values. Annotations are a more precise way of matching as can be seen in this example where a search for `Beck` will not match `Jeff Beck` : + +```console +# Example documents +PUT my-index-000001/_doc/1 +{ + "my_field": "[Beck](Beck) announced a new tour"<1> +} + +PUT my-index-000001/_doc/2 +{ + "my_field": "[Jeff Beck](Jeff+Beck&Guitarist) plays a strat"<2> +} + +# Example search +GET my-index-000001/_search +{ + "query": { + "term": { + "my_field": "Beck" <3> + } + } +} +``` + +1. As well as tokenising the plain text into single words e.g. `beck`, here we inject the single token value `Beck` at the same position as `beck` in the token stream. +2. Note annotations can inject multiple tokens at the same position - here we inject both the very specific value `Jeff Beck` and the broader term `Guitarist`. This enables broader positional queries e.g. finding mentions of a `Guitarist` near to `strat`. +3. A benefit of searching with these carefully defined annotation tokens is that a query for `Beck` will not match document 2 that contains the tokens `jeff`, `beck` and `Jeff Beck` + + +::::{warning} +Any use of `=` signs in annotation values eg `[Prince](person=Prince)` will cause the document to be rejected with a parse failure. In future we hope to have a use for the equals signs so will actively reject documents that contain this today. +:::: + + +## Synthetic `_source` [annotated-text-synthetic-source] + +::::{important} +Synthetic `_source` is Generally Available only for TSDB indices (indices that have `index.mode` set to `time_series`). For other indices synthetic `_source` is in technical preview. Features in technical preview may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. +:::: + + +If using a sub-`keyword` field then the values are sorted in the same way as a `keyword` field’s values are sorted. By default, that means sorted with duplicates removed. So: + +$$$synthetic-source-text-example-default$$$ + +```console +PUT idx +{ + "settings": { + "index": { + "mapping": { + "source": { + "mode": "synthetic" + } + } + } + }, + "mappings": { + "properties": { + "text": { + "type": "annotated_text", + "fields": { + "raw": { + "type": "keyword" + } + } + } + } + } +} +PUT idx/_doc/1 +{ + "text": [ + "the quick brown fox", + "the quick brown fox", + "jumped over the lazy dog" + ] +} +``` + +Will become: + +```console-result +{ + "text": [ + "jumped over the lazy dog", + "the quick brown fox" + ] +} +``` + +::::{note} +Reordering text fields can have an effect on [phrase](/reference/query-languages/query-dsl-match-query-phrase.md) and [span](/reference/query-languages/span-queries.md) queries. See the discussion about [`position_increment_gap`](/reference/elasticsearch/mapping-reference/position-increment-gap.md) for more detail. You can avoid this by making sure the `slop` parameter on the phrase queries is lower than the `position_increment_gap`. This is the default. +:::: + + +If the `annotated_text` field sets `store` to true then order and duplicates are preserved. + +$$$synthetic-source-text-example-stored$$$ + +```console +PUT idx +{ + "settings": { + "index": { + "mapping": { + "source": { + "mode": "synthetic" + } + } + } + }, + "mappings": { + "properties": { + "text": { "type": "annotated_text", "store": true } + } + } +} +PUT idx/_doc/1 +{ + "text": [ + "the quick brown fox", + "the quick brown fox", + "jumped over the lazy dog" + ] +} +``` + +Will become: + +```console-result +{ + "text": [ + "the quick brown fox", + "the quick brown fox", + "jumped over the lazy dog" + ] +} +``` + + diff --git a/docs/reference/elasticsearch-plugins/mapper-annotated-text.md b/docs/reference/elasticsearch-plugins/mapper-annotated-text.md new file mode 100644 index 0000000000000..24a510faa7a0a --- /dev/null +++ b/docs/reference/elasticsearch-plugins/mapper-annotated-text.md @@ -0,0 +1,49 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/mapper-annotated-text.html +--- + +# Mapper annotated text plugin [mapper-annotated-text] + +::::{warning} +This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. +:::: + + +The mapper-annotated-text plugin provides the ability to index text that is a combination of free-text and special markup that is typically used to identify items of interest such as people or organisations (see NER or Named Entity Recognition tools). + +The elasticsearch markup allows one or more additional tokens to be injected, unchanged, into the token stream at the same position as the underlying text it annotates. + + +## Installation [mapper-annotated-text-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install mapper-annotated-text +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/mapper-annotated-text/mapper-annotated-text-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/mapper-annotated-text/mapper-annotated-text-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/mapper-annotated-text/mapper-annotated-text-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/mapper-annotated-text/mapper-annotated-text-9.0.0-beta1.zip.asc). + + +## Removal [mapper-annotated-text-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove mapper-annotated-text +``` + +The node must be stopped before removing the plugin. + + + + + diff --git a/docs/reference/elasticsearch-plugins/mapper-murmur3-usage.md b/docs/reference/elasticsearch-plugins/mapper-murmur3-usage.md new file mode 100644 index 0000000000000..03bfb3f6a5183 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/mapper-murmur3-usage.md @@ -0,0 +1,58 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/mapper-murmur3-usage.html +--- + +# Using the murmur3 field [mapper-murmur3-usage] + +The `murmur3` is typically used within a multi-field, so that both the original value and its hash are stored in the index: + +```console +PUT my-index-000001 +{ + "mappings": { + "properties": { + "my_field": { + "type": "keyword", + "fields": { + "hash": { + "type": "murmur3" + } + } + } + } + } +} +``` + +Such a mapping would allow to refer to `my_field.hash` in order to get hashes of the values of the `my_field` field. This is only useful in order to run `cardinality` aggregations: + +```console +# Example documents +PUT my-index-000001/_doc/1 +{ + "my_field": "This is a document" +} + +PUT my-index-000001/_doc/2 +{ + "my_field": "This is another document" +} + +GET my-index-000001/_search +{ + "aggs": { + "my_field_cardinality": { + "cardinality": { + "field": "my_field.hash" <1> + } + } + } +} +``` + +1. Counting unique values on the `my_field.hash` field + + +Running a `cardinality` aggregation on the `my_field` field directly would yield the same result, however using `my_field.hash` instead might result in a speed-up if the field has a high-cardinality. On the other hand, it is discouraged to use the `murmur3` field on numeric fields and string fields that are not almost unique as the use of a `murmur3` field is unlikely to bring significant speed-ups, while increasing the amount of disk space required to store the index. + diff --git a/docs/reference/elasticsearch-plugins/mapper-murmur3.md b/docs/reference/elasticsearch-plugins/mapper-murmur3.md new file mode 100644 index 0000000000000..ffb3162626888 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/mapper-murmur3.md @@ -0,0 +1,39 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/mapper-murmur3.html +--- + +# Mapper murmur3 plugin [mapper-murmur3] + +The mapper-murmur3 plugin provides the ability to compute hash of field values at index-time and store them in the index. This can sometimes be helpful when running cardinality aggregations on high-cardinality and large string fields. + + +## Installation [mapper-murmur3-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install mapper-murmur3 +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/mapper-murmur3/mapper-murmur3-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/mapper-murmur3/mapper-murmur3-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/mapper-murmur3/mapper-murmur3-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/mapper-murmur3/mapper-murmur3-9.0.0-beta1.zip.asc). + + +## Removal [mapper-murmur3-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove mapper-murmur3 +``` + +The node must be stopped before removing the plugin. + + diff --git a/docs/reference/elasticsearch-plugins/mapper-plugins.md b/docs/reference/elasticsearch-plugins/mapper-plugins.md new file mode 100644 index 0000000000000..b273a007781df --- /dev/null +++ b/docs/reference/elasticsearch-plugins/mapper-plugins.md @@ -0,0 +1,26 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/mapper.html +--- + +# Mapper plugins [mapper] + +Mapper plugins allow new field data types to be added to Elasticsearch. + + +## Core mapper plugins [_core_mapper_plugins] + +The core mapper plugins are: + +[Mapper size plugin](/reference/elasticsearch-plugins/mapper-size.md) +: The mapper-size plugin provides the `_size` metadata field which, when enabled, indexes the size in bytes of the original [`_source`](/reference/elasticsearch/mapping-reference/mapping-source-field.md) field. + +[Mapper murmur3 plugin](/reference/elasticsearch-plugins/mapper-murmur3.md) +: The mapper-murmur3 plugin allows hashes to be computed at index-time and stored in the index for later use with the `cardinality` aggregation. + +[Mapper annotated text plugin](/reference/elasticsearch-plugins/mapper-annotated-text.md) +: The annotated text plugin provides the ability to index text that is a combination of free-text and special markup that is typically used to identify items of interest such as people or organisations (see NER or Named Entity Recognition tools). + + + + diff --git a/docs/reference/elasticsearch-plugins/mapper-size-usage.md b/docs/reference/elasticsearch-plugins/mapper-size-usage.md new file mode 100644 index 0000000000000..98ab81350b51c --- /dev/null +++ b/docs/reference/elasticsearch-plugins/mapper-size-usage.md @@ -0,0 +1,82 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/mapper-size-usage.html +--- + +# Using the _size field [mapper-size-usage] + +In order to enable the `_size` field, set the mapping as follows: + +```console +PUT my-index-000001 +{ + "mappings": { + "_size": { + "enabled": true + } + } +} +``` + +The value of the `_size` field is accessible in queries, aggregations, scripts, and when sorting. It can be retrieved using the [fields parameter](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#search-fields-param): + +```console +# Example documents +PUT my-index-000001/_doc/1 +{ + "text": "This is a document" +} + +PUT my-index-000001/_doc/2 +{ + "text": "This is another document" +} + +GET my-index-000001/_search +{ + "query": { + "range": { + "_size": { <1> + "gt": 10 + } + } + }, + "aggs": { + "sizes": { + "terms": { + "field": "_size", <2> + "size": 10 + } + } + }, + "sort": [ + { + "_size": { <3> + "order": "desc" + } + } + ], + "fields": ["_size"], <4> + "script_fields": { + "size": { + "script": "doc['_size']" <5> + } + } +} +``` + +1. Querying on the `_size` field +2. Aggregating on the `_size` field +3. Sorting on the `_size` field +4. Use the `fields` parameter to return the `_size` in the search response. +5. Uses a [script field](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#script-fields) to return the `_size` field in the search response. + + +::::{admonition} Using `_size` in {kib} +:class: note + +To use the `_size` field in {{kib}}, update the `metaFields` setting and add `_size` to the list of meta fields. `metaFields` can be configured in {{kib}} from the Advanced Settings page in Management. + +:::: + + diff --git a/docs/reference/elasticsearch-plugins/mapper-size.md b/docs/reference/elasticsearch-plugins/mapper-size.md new file mode 100644 index 0000000000000..5f0414ef69026 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/mapper-size.md @@ -0,0 +1,39 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/mapper-size.html +--- + +# Mapper size plugin [mapper-size] + +The mapper-size plugin provides the `_size` metadata field which, when enabled, indexes the size in bytes of the original [`_source`](/reference/elasticsearch/mapping-reference/mapping-source-field.md) field. + + +## Installation [mapper-size-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install mapper-size +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/mapper-size/mapper-size-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/mapper-size/mapper-size-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/mapper-size/mapper-size-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/mapper-size/mapper-size-9.0.0-beta1.zip.asc). + + +## Removal [mapper-size-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove mapper-size +``` + +The node must be stopped before removing the plugin. + + diff --git a/docs/reference/elasticsearch-plugins/plugin-management-custom-url.md b/docs/reference/elasticsearch-plugins/plugin-management-custom-url.md new file mode 100644 index 0000000000000..e7c2f790fc41a --- /dev/null +++ b/docs/reference/elasticsearch-plugins/plugin-management-custom-url.md @@ -0,0 +1,55 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/plugin-management-custom-url.html +--- + +# Custom URL or file system [plugin-management-custom-url] + +A plugin can also be downloaded directly from a custom location by specifying the URL: + +```shell +sudo bin/elasticsearch-plugin install [url] <1> +``` + +1. must be a valid URL, the plugin name is determined from its descriptor. + + +Unix +: To install a plugin from your local file system at `/path/to/plugin.zip`, you could run: + + ```shell + sudo bin/elasticsearch-plugin install file:///path/to/plugin.zip + ``` + + +Windows +: To install a plugin from your local file system at `C:\path\to\plugin.zip`, you could run: + + ```shell + bin\elasticsearch-plugin install file:///C:/path/to/plugin.zip + ``` + + ::::{note} + Any path that contains spaces must be wrapped in quotes! + :::: + + + ::::{note} + If you are installing a plugin from the filesystem the plugin distribution must not be contained in the `plugins` directory for the node that you are installing the plugin to or installation will fail. + :::: + + +HTTP +: To install a plugin from an HTTP URL: + + ```shell + sudo bin/elasticsearch-plugin install https://some.domain/path/to/plugin.zip + ``` + + The plugin script will refuse to talk to an HTTPS URL with an untrusted certificate. To use a self-signed HTTPS cert, you will need to add the CA cert to a local Java truststore and pass the location to the script as follows: + + ```shell + sudo CLI_JAVA_OPTS="-Djavax.net.ssl.trustStore=/path/to/trustStore.jks" bin/elasticsearch-plugin install https://host/plugin.zip + ``` + + diff --git a/docs/reference/elasticsearch-plugins/plugin-management.md b/docs/reference/elasticsearch-plugins/plugin-management.md new file mode 100644 index 0000000000000..111b9b6fd107e --- /dev/null +++ b/docs/reference/elasticsearch-plugins/plugin-management.md @@ -0,0 +1,11 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/plugin-management.html + - https://www.elastic.co/guide/en/cloud/current/ec-adding-plugins.html + - https://www.elastic.co/guide/en/cloud-enterprise/current/ece-add-plugins.html +--- + +# Plugin management + +% The inventory is not clear about which of the mapped pages should be source material +% for this page vs. added as separate pages. diff --git a/docs/reference/elasticsearch-plugins/repository-hdfs-config.md b/docs/reference/elasticsearch-plugins/repository-hdfs-config.md new file mode 100644 index 0000000000000..790feb2820f3a --- /dev/null +++ b/docs/reference/elasticsearch-plugins/repository-hdfs-config.md @@ -0,0 +1,61 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/repository-hdfs-config.html +--- + +# Configuration properties [repository-hdfs-config] + +Once installed, define the configuration for the `hdfs` repository through the [REST API](docs-content://deploy-manage/tools/snapshot-and-restore.md): + +```console +PUT _snapshot/my_hdfs_repository +{ + "type": "hdfs", + "settings": { + "uri": "hdfs://namenode:8020/", + "path": "elasticsearch/repositories/my_hdfs_repository", + "conf.dfs.client.read.shortcircuit": "true" + } +} +``` + +The following settings are supported: + +`uri` +: The uri address for hdfs. ex: "hdfs://:/". (Required) + +`path` +: The file path within the filesystem where data is stored/loaded. ex: "path/to/file". (Required) + +`load_defaults` +: Whether to load the default Hadoop configuration or not. (Enabled by default) + +`conf.` +: Inlined configuration parameter to be added to Hadoop configuration. (Optional) Only client oriented properties from the hadoop [core](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml) and [hdfs](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml) configuration files will be recognized by the plugin. + +`compress` +: Whether to compress the metadata or not. (Enabled by default) + +`max_restore_bytes_per_sec` +: Throttles per node restore rate. Defaults to unlimited. Note that restores are also throttled through [recovery settings](/reference/elasticsearch/configuration-reference/index-recovery-settings.md). + +`max_snapshot_bytes_per_sec` +: Throttles per node snapshot rate. Defaults to `40mb` per second. Note that if the [recovery settings for managed services](/reference/elasticsearch/configuration-reference/index-recovery-settings.md) are set, then it defaults to unlimited, and the rate is additionally throttled through [recovery settings](/reference/elasticsearch/configuration-reference/index-recovery-settings.md). + +`readonly` +: Makes repository read-only. Defaults to `false`. + +`chunk_size` +: Override the chunk size. (Disabled by default) + +`security.principal` +: Kerberos principal to use when connecting to a secured HDFS cluster. If you are using a service principal for your elasticsearch node, you may use the `_HOST` pattern in the principal name and the plugin will replace the pattern with the hostname of the node at runtime (see [Creating the Secure Repository](/reference/elasticsearch-plugins/repository-hdfs-security.md#repository-hdfs-security-runtime)). + +`replication_factor` +: The replication factor for all new HDFS files created by this repository. Must be greater or equal to `dfs.replication.min` and less or equal to `dfs.replication.max` HDFS option. Defaults to using HDFS cluster setting. + + +## A note on HDFS availability [repository-hdfs-availability] + +When you initialize a repository, its settings are persisted in the cluster state. When a node comes online, it will attempt to initialize all repositories for which it has settings. If your cluster has an HDFS repository configured, then all nodes in the cluster must be able to reach HDFS when starting. If not, then the node will fail to initialize the repository at start up and the repository will be unusable. If this happens, you will need to remove and re-add the repository or restart the offending node. + diff --git a/docs/reference/elasticsearch-plugins/repository-hdfs-security.md b/docs/reference/elasticsearch-plugins/repository-hdfs-security.md new file mode 100644 index 0000000000000..f7b24e79ad95a --- /dev/null +++ b/docs/reference/elasticsearch-plugins/repository-hdfs-security.md @@ -0,0 +1,82 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/repository-hdfs-security.html +--- + +# Hadoop security [repository-hdfs-security] + +The HDFS repository plugin integrates seamlessly with Hadoop’s authentication model. The following authentication methods are supported by the plugin: + +`simple` +: Also means "no security" and is enabled by default. Uses information from underlying operating system account running Elasticsearch to inform Hadoop of the name of the current user. Hadoop makes no attempts to verify this information. + +`kerberos` +: Authenticates to Hadoop through the usage of a Kerberos principal and keytab. Interfacing with HDFS clusters secured with Kerberos requires a few additional steps to enable (See [Principals and keytabs](#repository-hdfs-security-keytabs) and [Creating the secure repository](#repository-hdfs-security-runtime) for more info) + + +## Principals and keytabs [repository-hdfs-security-keytabs] + +Before attempting to connect to a secured HDFS cluster, provision the Kerberos principals and keytabs that the Elasticsearch nodes will use for authenticating to Kerberos. For maximum security and to avoid tripping up the Kerberos replay protection, you should create a service principal per node, following the pattern of `elasticsearch/hostname@REALM`. + +::::{warning} +In some cases, if the same principal is authenticating from multiple clients at once, services may reject authentication for those principals under the assumption that they could be replay attacks. If you are running the plugin in production with multiple nodes you should be using a unique service principal for each node. +:::: + + +On each Elasticsearch node, place the appropriate keytab file in the node’s configuration location under the `repository-hdfs` directory using the name `krb5.keytab`: + +```bash +$> cd elasticsearch/config +$> ls +elasticsearch.yml jvm.options log4j2.properties repository-hdfs/ scripts/ +$> cd repository-hdfs +$> ls +krb5.keytab +``` + +::::{note} +Make sure you have the correct keytabs! If you are using a service principal per node (like `elasticsearch/hostname@REALM`) then each node will need its own unique keytab file for the principal assigned to that host! +:::: + + + +## Creating the secure repository [repository-hdfs-security-runtime] + +Once your keytab files are in place and your cluster is started, creating a secured HDFS repository is simple. Just add the name of the principal that you will be authenticating as in the repository settings under the `security.principal` option: + +```console +PUT _snapshot/my_hdfs_repository +{ + "type": "hdfs", + "settings": { + "uri": "hdfs://namenode:8020/", + "path": "/user/elasticsearch/repositories/my_hdfs_repository", + "security.principal": "elasticsearch@REALM" + } +} +``` + +If you are using different service principals for each node, you can use the `_HOST` pattern in your principal name. Elasticsearch will automatically replace the pattern with the hostname of the node at runtime: + +```console +PUT _snapshot/my_hdfs_repository +{ + "type": "hdfs", + "settings": { + "uri": "hdfs://namenode:8020/", + "path": "/user/elasticsearch/repositories/my_hdfs_repository", + "security.principal": "elasticsearch/_HOST@REALM" + } +} +``` + + +## Authorization [repository-hdfs-security-authorization] + +Once Elasticsearch is connected and authenticated to HDFS, HDFS will infer a username to use for authorizing file access for the client. By default, it picks this username from the primary part of the kerberos principal used to authenticate to the service. For example, in the case of a principal like `elasticsearch@REALM` or `elasticsearch/hostname@REALM` then the username that HDFS extracts for file access checks will be `elasticsearch`. + +::::{note} +The repository plugin makes no assumptions of what Elasticsearch’s principal name is. The main fragment of the Kerberos principal is not required to be `elasticsearch`. If you have a principal or service name that works better for you or your organization then feel free to use it instead! +:::: + + diff --git a/docs/reference/elasticsearch-plugins/repository-hdfs-usage.md b/docs/reference/elasticsearch-plugins/repository-hdfs-usage.md new file mode 100644 index 0000000000000..21aafb7294f07 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/repository-hdfs-usage.md @@ -0,0 +1,14 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/repository-hdfs-usage.html +--- + +# Getting started with HDFS [repository-hdfs-usage] + +The HDFS snapshot/restore plugin is built against the latest Apache Hadoop 2.x (currently 2.7.1). If the distro you are using is not protocol compatible with Apache Hadoop, consider replacing the Hadoop libraries inside the plugin folder with your own (you might have to adjust the security permissions required). + +Even if Hadoop is already installed on the Elasticsearch nodes, for security reasons, the required libraries need to be placed under the plugin folder. Note that in most cases, if the distro is compatible, one simply needs to configure the repository with the appropriate Hadoop configuration files (see below). + +Windows Users +: Using Apache Hadoop on Windows is problematic and thus it is not recommended. For those *really* wanting to use it, make sure you place the elusive `winutils.exe` under the plugin folder and point `HADOOP_HOME` variable to it; this should minimize the amount of permissions Hadoop requires (though one would still have to add some more). + diff --git a/docs/reference/elasticsearch-plugins/repository-hdfs.md b/docs/reference/elasticsearch-plugins/repository-hdfs.md new file mode 100644 index 0000000000000..1b5eded50226f --- /dev/null +++ b/docs/reference/elasticsearch-plugins/repository-hdfs.md @@ -0,0 +1,41 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/repository-hdfs.html +--- + +# Hadoop HDFS repository plugin [repository-hdfs] + +The HDFS repository plugin adds support for using HDFS File System as a repository for [Snapshot/Restore](docs-content://deploy-manage/tools/snapshot-and-restore.md). + + +## Installation [repository-hdfs-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install repository-hdfs +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/repository-hdfs/repository-hdfs-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/repository-hdfs/repository-hdfs-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/repository-hdfs/repository-hdfs-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/repository-hdfs/repository-hdfs-9.0.0-beta1.zip.asc). + + +## Removal [repository-hdfs-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove repository-hdfs +``` + +The node must be stopped before removing the plugin. + + + + diff --git a/docs/reference/elasticsearch-plugins/snapshotrestore-repository-plugins.md b/docs/reference/elasticsearch-plugins/snapshotrestore-repository-plugins.md new file mode 100644 index 0000000000000..60824ba219fb5 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/snapshotrestore-repository-plugins.md @@ -0,0 +1,30 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/repository.html +--- + +# Snapshot/restore repository plugins [repository] + +Repository plugins extend the [Snapshot/Restore](docs-content://deploy-manage/tools/snapshot-and-restore.md) functionality in Elasticsearch by adding repositories backed by the cloud or by distributed file systems: + + +### Official repository plugins [_official_repository_plugins] + +::::{note} +Support for S3, GCS and Azure repositories is now bundled in {{es}} by default. +:::: + + +The official repository plugins are: + +[HDFS Repository](/reference/elasticsearch-plugins/repository-hdfs.md) +: The Hadoop HDFS Repository plugin adds support for using HDFS as a repository. + + +## Community contributed repository plugins [_community_contributed_repository_plugins] + +The following plugin has been contributed by our community: + +* [Openstack Swift](https://github.com/BigDataBoutique/elasticsearch-repository-swift) (by Wikimedia Foundation and BigData Boutique) + + diff --git a/docs/reference/elasticsearch-plugins/store-plugins.md b/docs/reference/elasticsearch-plugins/store-plugins.md new file mode 100644 index 0000000000000..8c9add1ea74e9 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/store-plugins.md @@ -0,0 +1,18 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/store.html +--- + +# Store plugins [store] + +Store plugins offer alternatives to default Lucene stores. + + +## Core store plugins [_core_store_plugins] + +The core store plugins are: + +[Store SMB](/reference/elasticsearch-plugins/store-smb.md) +: The Store SMB plugin works around for a bug in Windows SMB and Java on windows. + + diff --git a/docs/reference/elasticsearch-plugins/store-smb-usage.md b/docs/reference/elasticsearch-plugins/store-smb-usage.md new file mode 100644 index 0000000000000..da892eba0f6d0 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/store-smb-usage.md @@ -0,0 +1,41 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/store-smb-usage.html +--- + +# Working around a bug in Windows SMB and Java on windows [store-smb-usage] + +When using a shared file system based on the SMB protocol (like Azure File Service) to store indices, the way Lucene opens index segment files is with a write only flag. This is the *correct* way to open the files, as they will only be used for writes and allows different FS implementations to optimize for it. Sadly, in windows with SMB, this disables the cache manager, causing writes to be slow. This has been described in [LUCENE-6176](https://issues.apache.org/jira/browse/LUCENE-6176), but it affects each and every Java program out there!. This need and must be fixed outside of ES and/or Lucene, either in windows or OpenJDK. For now, we are providing an experimental support to open the files with read flag, but this should be considered experimental and the correct way to fix it is in OpenJDK or Windows. + +The Store SMB plugin provides two storage types optimized for SMB: + +`smb_mmap_fs` +: a SMB specific implementation of the default [mmap fs](/reference/elasticsearch/index-settings/store.md#mmapfs) + +`smb_simple_fs` +: deprecated::[7.15,"smb_simple_fs is deprecated and will be removed in 8.0. Use smb_nio_fs or other file systems instead."] + +`smb_nio_fs` +: a SMB specific implementation of the default [nio fs](/reference/elasticsearch/index-settings/store.md#niofs) + +To use one of these specific storage types, you need to install the Store SMB plugin and restart the node. Then configure Elasticsearch to set the storage type you want. + +This can be configured for all indices by adding this to the `elasticsearch.yml` file: + +```yaml +index.store.type: smb_nio_fs +``` + +Note that settings will be applied for newly created indices. + +It can also be set on a per-index basis at index creation time: + +```console +PUT my-index-000001 +{ + "settings": { + "index.store.type": "smb_mmap_fs" + } +} +``` + diff --git a/docs/reference/elasticsearch-plugins/store-smb.md b/docs/reference/elasticsearch-plugins/store-smb.md new file mode 100644 index 0000000000000..f3f35a2dbc5c4 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/store-smb.md @@ -0,0 +1,39 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/plugins/current/store-smb.html +--- + +# Store SMB plugin [store-smb] + +The Store SMB plugin works around for a bug in Windows SMB and Java on windows. + + +## Installation [store-smb-install] + +::::{warning} +Version 9.0.0-beta1 of the Elastic Stack has not yet been released. The plugin might not be available. +:::: + + +This plugin can be installed using the plugin manager: + +```sh +sudo bin/elasticsearch-plugin install store-smb +``` + +The plugin must be installed on every node in the cluster, and each node must be restarted after installation. + +You can download this plugin for [offline install](/reference/elasticsearch-plugins/plugin-management-custom-url.md) from [https://artifacts.elastic.co/downloads/elasticsearch-plugins/store-smb/store-smb-9.0.0-beta1.zip](https://artifacts.elastic.co/downloads/elasticsearch-plugins/store-smb/store-smb-9.0.0-beta1.zip). To verify the `.zip` file, use the [SHA hash](https://artifacts.elastic.co/downloads/elasticsearch-plugins/store-smb/store-smb-9.0.0-beta1.zip.sha512) or [ASC key](https://artifacts.elastic.co/downloads/elasticsearch-plugins/store-smb/store-smb-9.0.0-beta1.zip.asc). + + +## Removal [store-smb-remove] + +The plugin can be removed with the following command: + +```sh +sudo bin/elasticsearch-plugin remove store-smb +``` + +The node must be stopped before removing the plugin. + + diff --git a/docs/reference/elasticsearch-plugins/toc.yml b/docs/reference/elasticsearch-plugins/toc.yml new file mode 100644 index 0000000000000..9a74f188fbdd1 --- /dev/null +++ b/docs/reference/elasticsearch-plugins/toc.yml @@ -0,0 +1,113 @@ +toc: + - file: index.md + - file: plugin-management.md + children: + - file: installation.md + - file: plugin-management-custom-url.md + - file: installing-multiple-plugins.md + - file: mandatory-plugins.md + - file: listing-removing-updating.md + - file: _other_command_line_parameters.md + - file: _plugins_directory.md + - file: manage-plugins-using-configuration-file.md + - file: cloud/ec-adding-plugins.md + children: + - file: cloud/ec-adding-elastic-plugins.md + - file: cloud/ec-custom-bundles.md + - file: cloud/ec-plugins-guide.md + - file: cloud-enterprise/ece-add-plugins.md + - file: api-extension-plugins.md + - file: analysis-plugins.md + children: + - file: analysis-icu.md + children: + - file: analysis-icu-analyzer.md + - file: analysis-icu-normalization-charfilter.md + - file: analysis-icu-tokenizer.md + - file: analysis-icu-normalization.md + - file: analysis-icu-folding.md + - file: analysis-icu-collation.md + - file: analysis-icu-collation-keyword-field.md + - file: analysis-icu-transform.md + - file: analysis-kuromoji.md + children: + - file: analysis-kuromoji-analyzer.md + - file: analysis-kuromoji-charfilter.md + - file: analysis-kuromoji-tokenizer.md + - file: analysis-kuromoji-baseform.md + - file: analysis-kuromoji-speech.md + - file: analysis-kuromoji-readingform.md + - file: analysis-kuromoji-stemmer.md + - file: analysis-kuromoji-stop.md + - file: analysis-kuromoji-number.md + - file: analysis-kuromoji-hiragana-uppercase.md + - file: analysis-kuromoji-katakana-uppercase.md + - file: analysis-kuromoji-completion.md + - file: analysis-nori.md + children: + - file: analysis-nori-analyzer.md + - file: analysis-nori-tokenizer.md + - file: analysis-nori-speech.md + - file: analysis-nori-readingform.md + - file: analysis-nori-number.md + - file: analysis-phonetic.md + children: + - file: analysis-phonetic-token-filter.md + - file: analysis-smartcn.md + children: + - file: _reimplementing_and_extending_the_analyzers.md + - file: analysis-smartcn_stop.md + - file: analysis-stempel.md + children: + - file: _reimplementing_and_extending_the_analyzers_2.md + - file: analysis-polish-stop.md + - file: analysis-ukrainian.md + - file: discovery-plugins.md + children: + - file: discovery-ec2.md + children: + - file: discovery-ec2-usage.md + - file: cloud-aws-best-practices.md + - file: discovery-azure-classic.md + children: + - file: discovery-azure-classic-usage.md + - file: discovery-azure-classic-long.md + - file: discovery-azure-classic-scale.md + - file: discovery-gce.md + children: + - file: discovery-gce-usage.md + - file: discovery-gce-network-host.md + - file: discovery-gce-usage-long.md + - file: discovery-gce-usage-cloning.md + - file: discovery-gce-usage-zones.md + - file: discovery-gce-usage-tags.md + - file: discovery-gce-usage-port.md + - file: discovery-gce-usage-tips.md + - file: discovery-gce-usage-testing.md + - file: mapper-plugins.md + children: + - file: mapper-size.md + children: + - file: mapper-size-usage.md + - file: mapper-murmur3.md + children: + - file: mapper-murmur3-usage.md + - file: mapper-annotated-text.md + children: + - file: mapper-annotated-text-usage.md + - file: mapper-annotated-text-tips.md + - file: mapper-annotated-text-highlighter.md + - file: mapper-annotated-text-limitations.md + - file: snapshotrestore-repository-plugins.md + children: + - file: repository-hdfs.md + children: + - file: repository-hdfs-usage.md + - file: repository-hdfs-config.md + - file: repository-hdfs-security.md + - file: store-plugins.md + children: + - file: store-smb.md + children: + - file: store-smb-usage.md + - file: integrations.md \ No newline at end of file diff --git a/docs/reference/elasticsearch/command-line-tools/certgen.md b/docs/reference/elasticsearch/command-line-tools/certgen.md new file mode 100644 index 0000000000000..d2b55ef25ce6e --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/certgen.md @@ -0,0 +1,128 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/certgen.html +--- + +# elasticsearch-certgen [certgen] + +::::{admonition} Deprecated in 6.1. +:class: warning + +Replaced by [`elasticsearch-certutil`](/reference/elasticsearch/command-line-tools/certutil.md). +:::: + + +The `elasticsearch-certgen` command simplifies the creation of certificate authorities (CA), certificate signing requests (CSR), and signed certificates for use with the Elastic Stack. Though this command is deprecated, you do not need to replace CAs, CSRs, or certificates that it created. + + +## Synopsis [_synopsis] + +```shell +bin/elasticsearch-certgen +(([--cert ] [--days ] [--dn ] [--key ] +[--keysize ] [--pass ] [--p12 ]) +| [--csr]) +[-E ] [-h, --help] [--in ] [--out ] +([-s, --silent] | [-v, --verbose]) +``` + + +## Description [_description_8] + +By default, the command runs in interactive mode and you are prompted for information about each instance. An instance is any piece of the Elastic Stack that requires a Transport Layer Security (TLS) or SSL certificate. Depending on your configuration, {{es}}, Logstash, {{kib}}, and Beats might all require a certificate and private key. + +The minimum required value for each instance is a name. This can simply be the hostname, which is used as the Common Name of the certificate. You can also use a full distinguished name. IP addresses and DNS names are optional. Multiple values can be specified as a comma separated string. If no IP addresses or DNS names are provided, you might disable hostname verification in your TLS or SSL configuration. + +Depending on the parameters that you specify, you are also prompted for necessary information such as the path for the output file and the CA private key password. + +The `elasticsearch-certgen` command also supports a silent mode of operation to enable easier batch operations. For more information, see [Using `elasticsearch-certgen` in Silent Mode](#certgen-silent). + +The output file is a zip file that contains the signed certificates and private keys for each instance. If you chose to generate a CA, which is the default behavior, the certificate and private key are included in the output file. If you chose to generate CSRs, you should provide them to your commercial or organization-specific certificate authority to obtain signed certificates. The signed certificates must be in PEM format to work with the {{stack}} {{security-features}}. + + +## Parameters [certgen-parameters] + +`--cert ` +: Specifies to generate new instance certificates and keys using an existing CA certificate, which is provided in the `` argument. This parameter cannot be used with the `-csr` parameter. + +`--csr` +: Specifies to operate in certificate signing request mode. + +`--days ` +: Specifies an integer value that represents the number of days the generated keys are valid. The default value is `1095`. This parameter cannot be used with the `-csr` parameter. + +`--dn ` +: Defines the *Distinguished Name* that is used for the generated CA certificate. The default value is `CN=Elastic Certificate Tool Autogenerated CA`. This parameter cannot be used with the `-csr` parameter. + +`-E ` +: Configures a setting. + +`-h, --help` +: Returns all of the command parameters. + +`--in ` +: Specifies the file that is used to run in silent mode. The input file must be a YAML file, as described in [Using `elasticsearch-certgen` in Silent Mode](#certgen-silent). + +`--key ` +: Specifies the *private-key* file for the CA certificate. This parameter is required whenever the `-cert` parameter is used. + +`--keysize ` +: Defines the number of bits that are used in generated RSA keys. The default value is `2048`. + +`--out ` +: Specifies a path for the output file. + +`--pass ` +: Specifies the password for the CA private key. If the `-key` parameter is provided, then this is the password for the existing private key file. Otherwise, it is the password that should be applied to the generated CA key. This parameter cannot be used with the `-csr` parameter. + +`--p12 ` +: Generate a PKCS#12 (`.p12` or `.pfx`) container file for each of the instance certificates and keys. The generated file is protected by the supplied password, which can be blank. This parameter cannot be used with the `-csr` parameter. + +`-s, --silent` +: Shows minimal output. + +`-v, --verbose` +: Shows verbose output. + + +## Examples [_examples_14] + + +### Using `elasticsearch-certgen` in Silent Mode [certgen-silent] + +To use the silent mode of operation, you must create a YAML file that contains information about the instances. It must match the following format: + +```yaml +instances: + - name: "node1" <1> + ip: <2> + - "192.0.2.1" + dns: <3> + - "node1.mydomain.com" + - name: "node2" + ip: + - "192.0.2.2" + - "198.51.100.1" + - name: "node3" + - name: "node4" + dns: + - "node4.mydomain.com" + - "node4.internal" + - name: "CN=node5,OU=IT,DC=mydomain,DC=com" + filename: "node5" <4> +``` + +1. The name of the instance. This can be a simple string value or can be a Distinguished Name (DN). This is the only required field. +2. An optional array of strings that represent IP Addresses for this instance. Both IPv4 and IPv6 values are allowed. The values are added as Subject Alternative Names. +3. An optional array of strings that represent DNS names for this instance. The values are added as Subject Alternative Names. +4. The filename to use for this instance. This name is used as the name of the directory that contains the instance’s files in the output. It is also used in the names of the files within the directory. This filename should not have an extension. Note: If the `name` provided for the instance does not represent a valid filename, then the `filename` field must be present. + + +When your YAML file is ready, you can use the `elasticsearch-certgen` command to generate certificates or certificate signing requests. Simply use the `-in` parameter to specify the location of the file. For example: + +```sh +bin/elasticsearch-certgen -in instances.yml +``` + +This command generates a CA certificate and private key as well as certificates and private keys for the instances that are listed in the YAML file. + diff --git a/docs/reference/elasticsearch/command-line-tools/certutil.md b/docs/reference/elasticsearch/command-line-tools/certutil.md new file mode 100644 index 0000000000000..e01540f26dc17 --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/certutil.md @@ -0,0 +1,230 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/certutil.html +--- + +# elasticsearch-certutil [certutil] + +The `elasticsearch-certutil` command simplifies the creation of certificates for use with Transport Layer Security (TLS) in the {{stack}}. + + +## Synopsis [_synopsis_2] + +```shell +bin/elasticsearch-certutil +( +(ca [--ca-dn ] [--days ] [--pem]) + +| (cert ([--ca ] | [--ca-cert --ca-key ]) +[--ca-dn ] [--ca-pass ] [--days ] +[--dns ] [--in ] [--ip ] +[--multiple] [--name ] [--pem] [--self-signed]) + +| (csr [--dns ] [--in ] [--ip ] +[--name ]) + +[-E ] [--keysize ] [--out ] +[--pass ] +) + +| http + +[-h, --help] ([-s, --silent] | [-v, --verbose]) +``` + + +## Description [_description_9] + +You can specify one of the following modes: `ca`, `cert`, `csr`, `http`. The `elasticsearch-certutil` command also supports a silent mode of operation to enable easier batch operations. + + +### CA mode [certutil-ca] + +The `ca` mode generates a new certificate authority (CA). By default, it produces a single PKCS#12 output file, which holds the CA certificate and the private key for the CA. If you specify the `--pem` parameter, the command generates a zip file, which contains the certificate and private key in PEM format. + +You can subsequently use these files as input for the `cert` mode of the command. + + +### CERT mode [certutil-cert] + +The `cert` mode generates X.509 certificates and private keys. By default, it produces a single certificate and key for use on a single instance. + +To generate certificates and keys for multiple instances, specify the `--multiple` parameter, which prompts you for details about each instance. Alternatively, you can use the `--in` parameter to specify a YAML file that contains details about the instances. + +An instance is any piece of the Elastic Stack that requires a TLS or SSL certificate. Depending on your configuration, {{es}}, Logstash, {{kib}}, and Beats might all require a certificate and private key. The minimum required information for an instance is its name, which is used as the common name for the certificate. The instance name can be a hostname value or a full distinguished name. If the instance name would result in an invalid file or directory name, you must also specify a file name in the `--name` command parameter or in the `filename` field in an input YAML file. + +You can optionally provide IP addresses or DNS names for each instance. If neither IP addresses nor DNS names are specified, the Elastic Stack products cannot perform hostname verification and you might need to configure the `verification_mode` security setting to `certificate` only. For more information about this setting, see [Security settings](/reference/elasticsearch/configuration-reference/security-settings.md). + +All certificates that are generated by this command are signed by a CA unless the `--self-signed` parameter is specified. You must provide your own CA with the `--ca` or `--ca-cert` and `--ca-key` parameters unless `--self-signed` is specified. For more information about generating a CA, see the [CA mode of this command](#certutil-ca). To generate self-signed certificates, use the `--self-signed` parameter. + +By default, the `cert` mode produces a single PKCS#12 output file which holds the instance certificate, the instance private key, and the CA certificate. If you specify the `--pem` parameter, the command generates PEM formatted certificates and keys and packages them into a zip file. If you specify the `--multiple` or `--in` parameters, the command produces a zip file containing the generated certificates and keys. + + +### CSR mode [certutil-csr] + +The `csr` mode generates certificate signing requests (CSRs) that you can send to a trusted certificate authority to obtain signed certificates. The signed certificates must be in PEM or PKCS#12 format to work with {{es}} {{security-features}}. + +By default, the command produces a single CSR for a single instance. + +To generate CSRs for multiple instances, specify the `--multiple` parameter, which prompts you for details about each instance. Alternatively, you can use the `--in` parameter to specify a YAML file that contains details about the instances. + +The `csr` mode produces a single zip file which contains the CSRs and the private keys for each instance. Each CSR is provided as a standard PEM encoding of a PKCS#10 CSR. Each key is provided as a PEM encoding of an RSA private key. + + +### HTTP mode [certutil-http] + +The `http` mode guides you through the process of generating certificates for use on the HTTP (REST) interface for {{es}}. It asks you a number of questions in order to generate the right set of files for your needs. For example, depending on your choices, it might generate a zip file that contains a certificate authority (CA), a certificate signing request (CSR), or certificates and keys for use in {{es}} and {{kib}}. Each folder in the zip file contains a readme that explains how to use the files. + + +## Parameters [certutil-parameters] + +`ca` +: Specifies to generate a new local certificate authority (CA). This parameter cannot be used with the `csr`, `cert` or `http` parameters. + +`cert` +: Specifies to generate new X.509 certificates and keys. This parameter cannot be used with the `csr`, `ca` or `http` parameters. + +`csr` +: Specifies to generate certificate signing requests. This parameter cannot be used with the `ca`, `cert` or `http` parameters. + +`http` +: Generates a new certificate or certificate request for the {{es}} HTTP interface. This parameter cannot be used with the `ca`, `cert` or `csr` parameters. + +`--ca ` +: Specifies the path to an existing CA key pair (in PKCS#12 format). This parameter is only applicable to the `cert` parameter. + +`--ca-cert ` +: Specifies the path to an existing CA certificate (in PEM format). You must also specify the `--ca-key` parameter. The `--ca-cert` parameter is only applicable to the `cert` parameter. + +`--ca-dn ` +: Defines the *Distinguished Name* (DN) that is used for the generated CA certificate. The default value is `CN=Elastic Certificate Tool Autogenerated CA`. This parameter cannot be used with the `csr` or `http` parameters. + +`--ca-key ` +: Specifies the path to an existing CA private key (in PEM format). You must also specify the `--ca-cert` parameter. The `--ca-key` parameter is only applicable to the `cert` parameter. + +`--ca-pass ` +: Specifies the password for an existing CA private key or the generated CA private key. This parameter is only applicable to the `cert` parameter + +`--days ` +: Specifies an integer value that represents the number of days the generated certificates are valid. The default value is `1095`. This parameter cannot be used with the `csr` or `http` parameters. + +`--dns ` +: Specifies a comma-separated list of DNS names. This parameter cannot be used with the `ca` or `http` parameters. + +`-E ` +: Configures a setting. + +`-h, --help` +: Returns all of the command parameters. + +`--in ` +: Specifies the file that is used to run in silent mode. The input file must be a YAML file. This parameter cannot be used with the `ca` or `http` parameters. + +`--ip ` +: Specifies a comma-separated list of IP addresses. This parameter cannot be used with the `ca` or `http` parameters. + +`--keysize ` +: Defines the number of bits that are used in generated RSA keys. The default value is `2048`. This parameter cannot be used with the `http` parameter. + +`--multiple` +: Specifies to generate files for multiple instances. This parameter cannot be used with the `ca` or `http` parameters. + +`--name ` +: Specifies the name of the generated certificate. This parameter cannot be used with the `ca` or `http` parameters. + +`--out ` +: Specifies a path for the output files. This parameter cannot be used with the `http` parameter. + +`--pass ` +: Specifies the password for the generated private keys. This parameter cannot be used with the `http` parameters. + + Keys stored in PKCS#12 format are always password protected, however, this password may be *blank*. If you want to specify a blank password without a prompt, use `--pass ""` (with no `=`) on the command line. + + Keys stored in PEM format are password protected only if the `--pass` parameter is specified. If you do not supply an argument for the `--pass` parameter, you are prompted for a password. Encrypted PEM files do not support blank passwords (if you do not wish to password-protect your PEM keys, then do not specify `--pass`). + + +`--pem` +: Generates certificates and keys in PEM format instead of PKCS#12. This parameter cannot be used with the `csr` or `http` parameters. + +`--self-signed` +: Generates self-signed certificates. This parameter is only applicable to the `cert` parameter. + + ::::{note} + This option is not recommended for [setting up TLS on a cluster](docs-content://deploy-manage/security/set-up-basic-security.md#encrypt-internode-communication). In fact, a self-signed certificate should be used only when you can be sure that a CA is definitely not needed and trust is directly given to the certificate itself. + :::: + + +`-s, --silent` +: Shows minimal output. + +`-v, --verbose` +: Shows verbose output. + + +## Examples [_examples_15] + +The following command generates a CA certificate and private key in PKCS#12 format: + +```sh +bin/elasticsearch-certutil ca +``` + +You are prompted for an output filename and a password. Alternatively, you can specify the `--out` and `--pass` parameters. + +You can then generate X.509 certificates and private keys by using the new CA. For example: + +```sh +bin/elasticsearch-certutil cert --ca elastic-stack-ca.p12 +``` + +You are prompted for the CA password and for an output filename and password. Alternatively, you can specify the `--ca-pass`, `--out`, and `--pass` parameters. + +By default, this command generates a file called `elastic-certificates.p12`, which you can copy to the relevant configuration directory for each Elastic product that you want to configure. For more information, see [Encrypt internode communications with TLS](docs-content://deploy-manage/security/set-up-basic-security.md#encrypt-internode-communication). + + +### Using `elasticsearch-certutil` in Silent Mode [certutil-silent] + +To use the silent mode of operation, you must create a YAML file that contains information about the instances. It must match the following format: + +```yaml +instances: + - name: "node1" <1> + ip: <2> + - "192.0.2.1" + dns: <3> + - "node1.mydomain.com" + - name: "node2" + ip: + - "192.0.2.2" + - "198.51.100.1" + - name: "node3" + - name: "node4" + dns: + - "node4.mydomain.com" + - "node4.internal" + - name: "CN=node5,OU=IT,DC=mydomain,DC=com" + filename: "node5" <4> +``` + +1. The name of the instance. This can be a simple string value or can be a Distinguished Name (DN). This is the only required field. +2. An optional array of strings that represent IP Addresses for this instance. Both IPv4 and IPv6 values are allowed. The values are added as Subject Alternative Names. +3. An optional array of strings that represent DNS names for this instance. The values are added as Subject Alternative Names. +4. The filename to use for this instance. This name is used as the name of the directory that contains the instance’s files in the output. It is also used in the names of the files within the directory. This filename should not have an extension. Note: If the `name` provided for the instance does not represent a valid filename, then the `filename` field must be present. + + +When your YAML file is ready, you can use the `elasticsearch-certutil` command to generate certificates or certificate signing requests. Simply use the `--in` parameter to specify the location of the file. For example: + +```sh +bin/elasticsearch-certutil cert --silent --in instances.yml --out test1.zip --pass testpassword --ca elastic-stack-ca.p12 +``` + +This command generates a compressed `test1.zip` file. After you decompress the output file, there is a directory for each instance that was listed in the `instances.yml` file. Each instance directory contains a single PKCS#12 (`.p12`) file, which contains the instance certificate, instance private key, and CA certificate. + +You can also use the YAML file to generate certificate signing requests. For example: + +```sh +bin/elasticsearch-certutil csr --silent --in instances.yml --out test2.zip --pass testpassword +``` + +This command generates a compressed file, which contains a directory for each instance. Each instance directory contains a certificate signing request (`*.csr` file) and private key (`*.key` file). + diff --git a/docs/reference/elasticsearch/command-line-tools/create-enrollment-token.md b/docs/reference/elasticsearch/command-line-tools/create-enrollment-token.md new file mode 100644 index 0000000000000..3eec447a45ef7 --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/create-enrollment-token.md @@ -0,0 +1,67 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/create-enrollment-token.html +--- + +# elasticsearch-create-enrollment-token [create-enrollment-token] + +The `elasticsearch-create-enrollment-token` command creates enrollment tokens for {{es}} nodes and {{kib}} instances. + + +## Synopsis [_synopsis_3] + +```shell +bin/elasticsearch-create-enrollment-token +[-f, --force] [-h, --help] [-E ] [-s, --scope] [--url] +``` + + +## Description [_description_10] + +::::{note} +`elasticsearch-create-enrollment-token` can only be used with {{es}} clusters that have been [auto-configured for security](docs-content://deploy-manage/deploy/self-managed/installing-elasticsearch.md). +:::: + + +Use this command to create enrollment tokens, which you can use to enroll new {{es}} nodes to an existing cluster or configure {{kib}} instances to communicate with an existing {{es}} cluster that has security features enabled. The command generates (and subsequently removes) a temporary user in the [file realm](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/file-based.md) to run the request that creates enrollment tokens. + +::::{important} +You cannot use this tool if the file realm is disabled in your `elasticsearch.yml` file. +:::: + + +This command uses an HTTP connection to connect to the cluster and run the user management requests. The command automatically attempts to establish the connection over HTTPS by using the `xpack.security.http.ssl` settings in the `elasticsearch.yml` file. If you do not use the default configuration directory, ensure that the `ES_PATH_CONF` environment variable returns the correct path before you run the `elasticsearch-create-enrollment-token` command. You can override settings in your `elasticsearch.yml` file by using the `-E` command option. For more information about debugging connection failures, see [Setup-passwords command fails due to connection failure](docs-content://troubleshoot/elasticsearch/security/trb-security-setup.md). + + +## Parameters [create-enrollment-token-parameters] + +`-E ` +: Configures a standard {{es}} or {{xpack}} setting. + +`-f, --force` +: Forces the command to run against an unhealthy cluster. + +`-h, --help` +: Returns all of the command parameters. + +`-s, --scope` +: Specifies the scope of the generated token. Supported values are `node` and `kibana`. + +`--url` +: Specifies the base URL (hostname and port of the local node) that the tool uses to submit API requests to {{es}}. The default value is determined from the settings in your `elasticsearch.yml` file. If `xpack.security.http.ssl.enabled` is set to `true`, you must specify an HTTPS URL. + + +## Examples [_examples_16] + +The following command creates an enrollment token for enrolling an {{es}} node into a cluster: + +```shell +bin/elasticsearch-create-enrollment-token -s node +``` + +The following command creates an enrollment token for enrolling a {{kib}} instance into a cluster. The specified URL indicates where the elasticsearch-create-enrollment-token tool attempts to reach the local {{es}} node: + +```shell +bin/elasticsearch-create-enrollment-token -s kibana --url "https://172.0.0.3:9200" +``` + diff --git a/docs/reference/elasticsearch/command-line-tools/elasticsearch-croneval.md b/docs/reference/elasticsearch/command-line-tools/elasticsearch-croneval.md new file mode 100644 index 0000000000000..a984bbf7979d2 --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/elasticsearch-croneval.md @@ -0,0 +1,52 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/elasticsearch-croneval.html +--- + +# elasticsearch-croneval [elasticsearch-croneval] + +Validates and evaluates a [cron expression](/reference/elasticsearch/rest-apis/api-conventions.md#api-cron-expressions). + + +## Synopsis [_synopsis_4] + +```shell +bin/elasticsearch-croneval +[-c, --count ] [-h, --help] +([-s, --silent] | [-v, --verbose]) +``` + + +## Description [_description_11] + +This command enables you to verify that your cron expressions are valid for use with {{es}} and produce the expected results. + +This command is provided in the `$ES_HOME/bin` directory. + + +## Parameters [elasticsearch-croneval-parameters] + +`-c, --count` +: The number of future times this expression will be triggered. The default value is `10`. + +`-d, --detail` +: Shows detail for invalid cron expression. It will print the stacktrace if the expression is not valid. + +`-h, --help` +: Returns all of the command parameters. + +`-s, --silent` +: Shows minimal output. + +`-v, --verbose` +: Shows verbose output. + + +## Example [_example_11] + +If the cron expression is valid, the following command displays the next 20 times that the schedule will be triggered: + +```bash +bin/elasticsearch-croneval "0 0/1 * * * ?" -c 20 +``` + diff --git a/docs/reference/elasticsearch/command-line-tools/elasticsearch-keystore.md b/docs/reference/elasticsearch/command-line-tools/elasticsearch-keystore.md new file mode 100644 index 0000000000000..353c66d7a9b91 --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/elasticsearch-keystore.md @@ -0,0 +1,219 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/elasticsearch-keystore.html +--- + +# elasticsearch-keystore [elasticsearch-keystore] + +The `elasticsearch-keystore` command manages [secure settings](docs-content://deploy-manage/security/secure-settings.md) in the {{es}} keystore. + + +## Synopsis [elasticsearch-keystore-synopsis] + +```shell +bin/elasticsearch-keystore +( [add ] [-f] [--stdin] +| [add-file ( )+] +| [create] [-p] +| [has-passwd] +| [list] +| [passwd] +| [remove ] +| [show [-o ] ] +| [upgrade] +) [-h, --help] ([-s, --silent] | [-v, --verbose]) +``` + + +## Description [elasticsearch-keystore-description] + +::::{important} +This command should be run as the user that will run {{es}}. +:::: + + +Currently, all secure settings are node-specific settings that must have the same value on every node. Therefore you must run this command on every node. + +When the keystore is password-protected, you must supply the password each time {{es}} starts. + +Modifications to the keystore are not automatically applied to the running {{es}} node. Any changes to the keystore will take effect when you restart {{es}}. Some secure settings can be explicitly [reloaded](docs-content://deploy-manage/security/secure-settings.md#reloadable-secure-settings) without restart. + +Only some settings are designed to be read from the keystore. However, there is no validation to block unsupported settings from the keystore and they can cause {{es}} to fail to start. To see whether a setting is supported in the keystore, see the setting reference. + + +## Parameters [elasticsearch-keystore-parameters] + +`add ` +: Adds settings to the keystore. Multiple setting names can be specified as arguments to the `add` command. By default, you are prompted for the values of the settings. If the keystore is password protected, you are also prompted to enter the password. If a setting already exists in the keystore, you must confirm that you want to overwrite the current value. If the keystore does not exist, you must confirm that you want to create a keystore. To avoid these two confirmation prompts, use the `-f` parameter. + +`add-file ( )+` +: Adds files to the keystore. + +`create` +: Creates the keystore. + +`-f, --force` +: When used with the `add` parameter, the command no longer prompts you before overwriting existing entries in the keystore. Also, if you haven’t created a keystore yet, it creates a keystore that is obfuscated but not password protected. + +`-h, --help` +: Returns all of the command parameters. + +`has-passwd` +: Returns a success message if the keystore exists and is password-protected. Otherwise, the command fails with exit code 1 and returns an error message. + +`list` +: Lists the settings in the keystore. If the keystore is password protected, you are prompted to enter the password. + +`-p` +: When used with the `create` parameter, the command prompts you to enter a keystore password. If you don’t specify the `-p` flag or if you enter an empty password, the keystore is obfuscated but not password protected. + +`passwd` +: Changes or sets the keystore password. If the keystore is password protected, you are prompted to enter the current password and the new one. You can optionally use an empty string to remove the password. If the keystore is not password protected, you can use this command to set a password. + +`remove ` +: Removes settings from the keystore. Multiple setting names can be specified as arguments to the `remove` command. + +`show ` +: Displays the value of a single setting in the keystore. Pass the `-o` (or `--output`) parameter to write the setting to a file. If writing to the standard output (the terminal) the setting’s value is always interpreted as a UTF-8 string. If the setting contains binary data (for example for data that was added via the `add-file` command), always use the `-o` option to write to a file. + +`-s, --silent` +: Shows minimal output. + +`-x, --stdin` +: When used with the `add` parameter, you can pass the settings values through standard input (stdin). Separate multiple values with carriage returns or newlines. See [Add settings to the keystore](#add-string-to-keystore). + +`upgrade` +: Upgrades the internal format of the keystore. + +`-v, --verbose` +: Shows verbose output. + + +## Examples [elasticsearch-keystore-examples] + + +### Create the keystore [creating-keystore] + +To create the `elasticsearch.keystore`, use the `create` command: + +```sh +bin/elasticsearch-keystore create -p +``` + +You are prompted to enter the keystore password. A password-protected `elasticsearch.keystore` file is created alongside the `elasticsearch.yml` file. + + +### Change the password of the keystore [changing-keystore-password] + +To change the password of the `elasticsearch.keystore`, use the `passwd` command: + +```sh +bin/elasticsearch-keystore passwd +``` + +If the {{es}} keystore is password protected, you are prompted to enter the current password and then enter the new one. If it is not password protected, you are prompted to set a password. + + +### List settings in the keystore [list-settings] + +To list the settings in the keystore, use the `list` command. + +```sh +bin/elasticsearch-keystore list +``` + +If the {{es}} keystore is password protected, you are prompted to enter the password. + + +### Add settings to the keystore [add-string-to-keystore] + +Sensitive string settings, like authentication credentials for Cloud plugins, can be added with the `add` command: + +```sh +bin/elasticsearch-keystore add the.setting.name.to.set +``` + +You are prompted to enter the value of the setting. If the {{es}} keystore is password protected, you are also prompted to enter the password. + +You can also add multiple settings with the `add` command: + +```sh +bin/elasticsearch-keystore add \ + the.setting.name.to.set \ + the.other.setting.name.to.set +``` + +You are prompted to enter the values of the settings. If the {{es}} keystore is password protected, you are also prompted to enter the password. + +To pass the settings values through standard input (stdin), use the `--stdin` flag: + +```sh +cat /file/containing/setting/value | bin/elasticsearch-keystore add --stdin the.setting.name.to.set +``` + +Values for multiple settings must be separated by carriage returns or newlines. + + +### Add files to the keystore [add-file-to-keystore] + +You can add sensitive files, like authentication key files for Cloud plugins, using the `add-file` command. Settings and file paths are specified in pairs consisting of `setting path`. The value of the setting will be the binary contents of the file path at the time the file is added to the keystore. + +```sh +bin/elasticsearch-keystore add-file the.setting.name.to.set /path/example-file.json +``` + +You can add multiple files with the `add-file` command: + +```sh +bin/elasticsearch-keystore add-file \ + the.setting.name.to.set /path/example-file.json \ + the.other.setting.name.to.set /path/other-example-file.json +``` + +If the {{es}} keystore is password protected, you are prompted to enter the password. + + +### Show settings in the keystore [show-keystore-value] + +To display the value of a setting in the keystore use the `show` command: + +```sh +bin/elasticsearch-keystore show the.name.of.the.setting.to.show +``` + +If the setting contains binary data you should write it to a file with the `-o` (or `--output`) option: + +```sh +bin/elasticsearch-keystore show -o my_file binary.setting.name +``` + +If the {{es}} keystore is password protected, you are prompted to enter the password. + + +### Remove settings from the keystore [remove-settings] + +To remove a setting from the keystore, use the `remove` command: + +```sh +bin/elasticsearch-keystore remove the.setting.name.to.remove +``` + +You can also remove multiple settings with the `remove` command: + +```sh +bin/elasticsearch-keystore remove \ + the.setting.name.to.remove \ + the.other.setting.name.to.remove +``` + +If the {{es}} keystore is password protected, you are prompted to enter the password. + + +### Upgrade the keystore [keystore-upgrade] + +Occasionally, the internal format of the keystore changes. When {{es}} is installed from a package manager, an upgrade of the on-disk keystore to the new format is done during package upgrade. In other cases, {{es}} performs the upgrade during node startup. This requires that {{es}} has write permissions to the directory that contains the keystore. Alternatively, you can manually perform such an upgrade by using the `upgrade` command: + +```sh +bin/elasticsearch-keystore upgrade +``` + diff --git a/docs/reference/elasticsearch/command-line-tools/index.md b/docs/reference/elasticsearch/command-line-tools/index.md new file mode 100644 index 0000000000000..62bd9a731da84 --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/index.md @@ -0,0 +1,23 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/commands.html +--- + +# Command line tools [commands] + +{{es}} provides the following tools for configuring security and performing other tasks from the command line: + +* [*elasticsearch-certgen*](/reference/elasticsearch/command-line-tools/certgen.md) +* [*elasticsearch-certutil*](/reference/elasticsearch/command-line-tools/certutil.md) +* [*elasticsearch-create-enrollment-token*](/reference/elasticsearch/command-line-tools/create-enrollment-token.md) +* [*elasticsearch-croneval*](/reference/elasticsearch/command-line-tools/elasticsearch-croneval.md) +* [*elasticsearch-keystore*](/reference/elasticsearch/command-line-tools/elasticsearch-keystore.md) +* [*elasticsearch-node*](/reference/elasticsearch/command-line-tools/node-tool.md) +* [*elasticsearch-reconfigure-node*](/reference/elasticsearch/command-line-tools/reconfigure-node.md) +* [*elasticsearch-reset-password*](/reference/elasticsearch/command-line-tools/reset-password.md) +* [*elasticsearch-saml-metadata*](/reference/elasticsearch/command-line-tools/saml-metadata.md) +* [*elasticsearch-setup-passwords*](/reference/elasticsearch/command-line-tools/setup-passwords.md) +* [*elasticsearch-shard*](/reference/elasticsearch/command-line-tools/shard-tool.md) +* [*elasticsearch-syskeygen*](/reference/elasticsearch/command-line-tools/syskeygen.md) +* [*elasticsearch-users*](/reference/elasticsearch/command-line-tools/users-command.md) + diff --git a/docs/reference/elasticsearch/command-line-tools/node-tool.md b/docs/reference/elasticsearch/command-line-tools/node-tool.md new file mode 100644 index 0000000000000..70e61a32054f5 --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/node-tool.md @@ -0,0 +1,469 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/node-tool.html +--- + +# elasticsearch-node [node-tool] + +The `elasticsearch-node` command enables you to perform certain unsafe operations on a node that are only possible while it is shut down. This command allows you to adjust the [role](/reference/elasticsearch/configuration-reference/node-settings.md) of a node, unsafely edit cluster settings and may be able to recover some data after a disaster or start a node even if it is incompatible with the data on disk. + + +## Synopsis [_synopsis_5] + +```shell +bin/elasticsearch-node repurpose|unsafe-bootstrap|detach-cluster|override-version + [-E ] + [-h, --help] ([-s, --silent] | [-v, --verbose]) +``` + + +## Description [_description_12] + +This tool has a number of modes: + +* `elasticsearch-node repurpose` can be used to delete unwanted data from a node if it used to be a [data node](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#data-node-role) or a [master-eligible node](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#master-node-role) but has been repurposed not to have one or other of these roles. +* `elasticsearch-node remove-settings` can be used to remove persistent settings from the cluster state in case where it contains incompatible settings that prevent the cluster from forming. +* `elasticsearch-node remove-index-settings` can be used to remove index settings from the cluster state in case where it contains incompatible index settings that prevent the cluster from forming. +* `elasticsearch-node remove-customs` can be used to remove custom metadata from the cluster state in case where it contains broken metadata that prevents the cluster state from being loaded. +* `elasticsearch-node unsafe-bootstrap` can be used to perform *unsafe cluster bootstrapping*. It forces one of the nodes to form a brand-new cluster on its own, using its local copy of the cluster metadata. +* `elasticsearch-node detach-cluster` enables you to move nodes from one cluster to another. This can be used to move nodes into a new cluster created with the `elasticsearch-node unsafe-bootstrap` command. If unsafe cluster bootstrapping was not possible, it also enables you to move nodes into a brand-new cluster. +* `elasticsearch-node override-version` enables you to start up a node even if the data in the data path was written by an incompatible version of {{es}}. This may sometimes allow you to downgrade to an earlier version of {{es}}. + +$$$cli-tool-jvm-options-node$$$ + + +### JVM options [_jvm_options] + +CLI tools run with 64MB of heap. For most tools, this value is fine. However, if needed this can be overridden by setting the `CLI_JAVA_OPTS` environment variable. For example, the following increases the heap size used by the `elasticsearch-node` tool to 1GB. + +```shell +export CLI_JAVA_OPTS="-Xmx1g" +bin/elasticsearch-node ... +``` + + +### Changing the role of a node [node-tool-repurpose] + +There may be situations where you want to repurpose a node without following the [proper repurposing processes](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#change-node-role). The `elasticsearch-node repurpose` tool allows you to delete any excess on-disk data and start a node after repurposing it. + +The intended use is: + +* Stop the node +* Update `elasticsearch.yml` by setting `node.roles` as desired. +* Run `elasticsearch-node repurpose` on the node +* Start the node + +If you run `elasticsearch-node repurpose` on a node without the `data` role and with the `master` role then it will delete any remaining shard data on that node, but it will leave the index and cluster metadata alone. If you run `elasticsearch-node repurpose` on a node without the `data` and `master` roles then it will delete any remaining shard data and index metadata, but it will leave the cluster metadata alone. + +::::{warning} +Running this command can lead to data loss for the indices mentioned if the data contained is not available on other nodes in the cluster. Only run this tool if you understand and accept the possible consequences, and only after determining that the node cannot be repurposed cleanly. +:::: + + +The tool provides a summary of the data to be deleted and asks for confirmation before making any changes. You can get detailed information about the affected indices and shards by passing the verbose (`-v`) option. + + +### Removing persistent cluster settings [_removing_persistent_cluster_settings] + +There may be situations where a node contains persistent cluster settings that prevent the cluster from forming. Since the cluster cannot form, it is not possible to remove these settings using the [Cluster update settings](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings) API. + +The `elasticsearch-node remove-settings` tool allows you to forcefully remove those persistent settings from the on-disk cluster state. The tool takes a list of settings as parameters that should be removed, and also supports wildcard patterns. + +The intended use is: + +* Stop the node +* Run `elasticsearch-node remove-settings name-of-setting-to-remove` on the node +* Repeat for all other master-eligible nodes +* Start the nodes + + +### Removing index settings [_removing_index_settings] + +There may be situations where an index contains index settings that prevent the cluster from forming. Since the cluster cannot form, it is not possible to remove these settings using the [Update index settings](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-settings) API. + +The `elasticsearch-node remove-index-settings` tool allows you to forcefully remove those index settings from the on-disk cluster state. The tool takes a list of index settings as parameters that should be removed, and also supports wildcard patterns. + +The intended use is: + +* Stop the node +* Run `elasticsearch-node remove-index-settings name-of-index-setting-to-remove` on the node +* Repeat for all nodes +* Start the nodes + + +### Removing custom metadata from the cluster state [_removing_custom_metadata_from_the_cluster_state] + +There may be situations where a node contains custom metadata, typically provided by plugins, that prevent the node from starting up and loading the cluster from disk. + +The `elasticsearch-node remove-customs` tool allows you to forcefully remove the problematic custom metadata. The tool takes a list of custom metadata names as parameters that should be removed, and also supports wildcard patterns. + +The intended use is: + +* Stop the node +* Run `elasticsearch-node remove-customs name-of-custom-to-remove` on the node +* Repeat for all other master-eligible nodes +* Start the nodes + + +### Recovering data after a disaster [_recovering_data_after_a_disaster] + +Sometimes {{es}} nodes are temporarily stopped, perhaps because of the need to perform some maintenance activity or perhaps because of a hardware failure. After you resolve the temporary condition and restart the node, it will rejoin the cluster and continue normally. Depending on your configuration, your cluster may be able to remain completely available even while one or more of its nodes are stopped. + +Sometimes it might not be possible to restart a node after it has stopped. For example, the node’s host may suffer from a hardware problem that cannot be repaired. If the cluster is still available then you can start up a fresh node on another host and {{es}} will bring this node into the cluster in place of the failed node. + +Each node stores its data in the data directories defined by the [`path.data` setting](docs-content://deploy-manage/deploy/self-managed/important-settings-configuration.md#path-settings). This means that in a disaster you can also restart a node by moving its data directories to another host, presuming that those data directories can be recovered from the faulty host. + +{{es}} [requires a response from a majority of the master-eligible nodes](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/modules-discovery-quorums.md) in order to elect a master and to update the cluster state. This means that if you have three master-eligible nodes then the cluster will remain available even if one of them has failed. However if two of the three master-eligible nodes fail then the cluster will be unavailable until at least one of them is restarted. + +In very rare circumstances it may not be possible to restart enough nodes to restore the cluster’s availability. If such a disaster occurs, you should build a new cluster from a recent snapshot and re-import any data that was ingested since that snapshot was taken. + +However, if the disaster is serious enough then it may not be possible to recover from a recent snapshot either. Unfortunately in this case there is no way forward that does not risk data loss, but it may be possible to use the `elasticsearch-node` tool to construct a new cluster that contains some of the data from the failed cluster. + + +### Bypassing version checks [node-tool-override-version] + +The data that {{es}} writes to disk is designed to be read by the current version and a limited set of future versions. It cannot generally be read by older versions, nor by versions that are more than one major version newer. The data stored on disk includes the version of the node that wrote it, and {{es}} checks that it is compatible with this version when starting up. + +In rare circumstances it may be desirable to bypass this check and start up an {{es}} node using data that was written by an incompatible version. This may not work if the format of the stored data has changed, and it is a risky process because it is possible for the format to change in ways that {{es}} may misinterpret, silently leading to data loss. + +To bypass this check, you can use the `elasticsearch-node override-version` tool to overwrite the version number stored in the data path with the current version, causing {{es}} to believe that it is compatible with the on-disk data. + + +#### Unsafe cluster bootstrapping [node-tool-unsafe-bootstrap] + +If there is at least one remaining master-eligible node, but it is not possible to restart a majority of them, then the `elasticsearch-node unsafe-bootstrap` command will unsafely override the cluster’s [voting configuration](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/modules-discovery-voting.md) as if performing another [cluster bootstrapping process](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/modules-discovery-bootstrap-cluster.md). The target node can then form a new cluster on its own by using the cluster metadata held locally on the target node. + +::::{warning} +These steps can lead to arbitrary data loss since the target node may not hold the latest cluster metadata, and this out-of-date metadata may make it impossible to use some or all of the indices in the cluster. +:::: + + +Since unsafe bootstrapping forms a new cluster containing a single node, once you have run it you must use the [`elasticsearch-node detach-cluster` tool](#node-tool-detach-cluster) to migrate any other surviving nodes from the failed cluster into this new cluster. + +When you run the `elasticsearch-node unsafe-bootstrap` tool it will analyse the state of the node and ask for confirmation before taking any action. Before asking for confirmation it reports the term and version of the cluster state on the node on which it runs as follows: + +```txt +Current node cluster state (term, version) pair is (4, 12) +``` + +If you have a choice of nodes on which to run this tool then you should choose one with a term that is as large as possible. If there is more than one node with the same term, pick the one with the largest version. This information identifies the node with the freshest cluster state, which minimizes the quantity of data that might be lost. For example, if the first node reports `(4, 12)` and a second node reports `(5, 3)`, then the second node is preferred since its term is larger. However if the second node reports `(3, 17)` then the first node is preferred since its term is larger. If the second node reports `(4, 10)` then it has the same term as the first node, but has a smaller version, so the first node is preferred. + +::::{warning} +Running this command can lead to arbitrary data loss. Only run this tool if you understand and accept the possible consequences and have exhausted all other possibilities for recovery of your cluster. +:::: + + +The sequence of operations for using this tool are as follows: + +1. Make sure you have really lost access to at least half of the master-eligible nodes in the cluster, and they cannot be repaired or recovered by moving their data paths to healthy hardware. +2. Stop **all** remaining nodes. +3. Choose one of the remaining master-eligible nodes to become the new elected master as described above. +4. On this node, run the `elasticsearch-node unsafe-bootstrap` command as shown below. Verify that the tool reported `Master node was successfully bootstrapped`. +5. Start this node and verify that it is elected as the master node. +6. Run the [`elasticsearch-node detach-cluster` tool](#node-tool-detach-cluster), described below, on every other node in the cluster. +7. Start all other nodes and verify that each one joins the cluster. +8. Investigate the data in the cluster to discover if any was lost during this process. + +When you run the tool it will make sure that the node that is being used to bootstrap the cluster is not running. It is important that all other master-eligible nodes are also stopped while this tool is running, but the tool does not check this. + +The message `Master node was successfully bootstrapped` does not mean that there has been no data loss, it just means that tool was able to complete its job. + + +#### Detaching nodes from their cluster [node-tool-detach-cluster] + +It is unsafe for nodes to move between clusters, because different clusters have completely different cluster metadata. There is no way to safely merge the metadata from two clusters together. + +To protect against inadvertently joining the wrong cluster, each cluster creates a unique identifier, known as the *cluster UUID*, when it first starts up. Every node records the UUID of its cluster and refuses to join a cluster with a different UUID. + +However, if a node’s cluster has permanently failed then it may be desirable to try and move it into a new cluster. The `elasticsearch-node detach-cluster` command lets you detach a node from its cluster by resetting its cluster UUID. It can then join another cluster with a different UUID. + +For example, after unsafe cluster bootstrapping you will need to detach all the other surviving nodes from their old cluster so they can join the new, unsafely-bootstrapped cluster. + +Unsafe cluster bootstrapping is only possible if there is at least one surviving master-eligible node. If there are no remaining master-eligible nodes then the cluster metadata is completely lost. However, the individual data nodes also contain a copy of the index metadata corresponding with their shards. This sometimes allows a new cluster to import these shards as [dangling indices](/reference/elasticsearch/configuration-reference/local-gateway.md#dangling-indices). You can sometimes recover some indices after the loss of all main-eligible nodes in a cluster by creating a new cluster and then using the `elasticsearch-node detach-cluster` command to move any surviving nodes into this new cluster. Once the new cluster is fully formed, use the [Dangling indices API](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-indices) to list, import or delete any dangling indices. + +There is a risk of data loss when importing a dangling index because data nodes may not have the most recent copy of the index metadata and do not have any information about [which shard copies are in-sync](docs-content://deploy-manage/distributed-architecture/reading-and-writing-documents.md). This means that a stale shard copy may be selected to be the primary, and some of the shards may be incompatible with the imported mapping. + +::::{warning} +Execution of this command can lead to arbitrary data loss. Only run this tool if you understand and accept the possible consequences and have exhausted all other possibilities for recovery of your cluster. +:::: + + +The sequence of operations for using this tool are as follows: + +1. Make sure you have really lost access to every one of the master-eligible nodes in the cluster, and they cannot be repaired or recovered by moving their data paths to healthy hardware. +2. Start a new cluster and verify that it is healthy. This cluster may comprise one or more brand-new master-eligible nodes, or may be an unsafely-bootstrapped cluster formed as described above. +3. Stop **all** remaining data nodes. +4. On each data node, run the `elasticsearch-node detach-cluster` tool as shown below. Verify that the tool reported `Node was successfully detached from the cluster`. +5. If necessary, configure each data node to [discover the new cluster](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/discovery-hosts-providers.md). +6. Start each data node and verify that it has joined the new cluster. +7. Wait for all recoveries to have completed, and investigate the data in the cluster to discover if any was lost during this process. Use the [Dangling indices API](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-indices) to list, import or delete any dangling indices. + +The message `Node was successfully detached from the cluster` does not mean that there has been no data loss, it just means that tool was able to complete its job. + + +## Parameters [node-tool-parameters] + +`repurpose` +: Delete excess data when a node’s roles are changed. + +`unsafe-bootstrap` +: Specifies to unsafely bootstrap this node as a new one-node cluster. + +`detach-cluster` +: Specifies to unsafely detach this node from its cluster so it can join a different cluster. + +`override-version` +: Overwrites the version number stored in the data path so that a node can start despite being incompatible with the on-disk data. + +`remove-settings` +: Forcefully removes the provided persistent cluster settings from the on-disk cluster state. + +`-E ` +: Configures a setting. + +`-h, --help` +: Returns all of the command parameters. + +`-s, --silent` +: Shows minimal output. + +`-v, --verbose` +: Shows verbose output. + + +## Examples [_examples_17] + + +### Repurposing a node as a dedicated master node [_repurposing_a_node_as_a_dedicated_master_node] + +In this example, a former data node is repurposed as a dedicated master node. First update the node’s settings to `node.roles: [ "master" ]` in its `elasticsearch.yml` config file. Then run the `elasticsearch-node repurpose` command to find and remove excess shard data: + +```txt +node$ ./bin/elasticsearch-node repurpose + + WARNING: Elasticsearch MUST be stopped before running this tool. + +Found 2 shards in 2 indices to clean up +Use -v to see list of paths and indices affected +Node is being re-purposed as master and no-data. Clean-up of shard data will be performed. +Do you want to proceed? +Confirm [y/N] y +Node successfully repurposed to master and no-data. +``` + + +### Repurposing a node as a coordinating-only node [_repurposing_a_node_as_a_coordinating_only_node] + +In this example, a node that previously held data is repurposed as a coordinating-only node. First update the node’s settings to `node.roles: []` in its `elasticsearch.yml` config file. Then run the `elasticsearch-node repurpose` command to find and remove excess shard data and index metadata: + +```txt +node$./bin/elasticsearch-node repurpose + + WARNING: Elasticsearch MUST be stopped before running this tool. + +Found 2 indices (2 shards and 2 index meta data) to clean up +Use -v to see list of paths and indices affected +Node is being re-purposed as no-master and no-data. Clean-up of index data will be performed. +Do you want to proceed? +Confirm [y/N] y +Node successfully repurposed to no-master and no-data. +``` + + +### Removing persistent cluster settings [_removing_persistent_cluster_settings_2] + +If your nodes contain persistent cluster settings that prevent the cluster from forming, i.e., can’t be removed using the [Cluster update settings](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings) API, you can run the following commands to remove one or more cluster settings. + +```txt +node$ ./bin/elasticsearch-node remove-settings xpack.monitoring.exporters.my_exporter.host + + WARNING: Elasticsearch MUST be stopped before running this tool. + +The following settings will be removed: +xpack.monitoring.exporters.my_exporter.host: "10.1.2.3" + +You should only run this tool if you have incompatible settings in the +cluster state that prevent the cluster from forming. +This tool can cause data loss and its use should be your last resort. + +Do you want to proceed? + +Confirm [y/N] y + +Settings were successfully removed from the cluster state +``` + +You can also use wildcards to remove multiple settings, for example using + +```txt +node$ ./bin/elasticsearch-node remove-settings xpack.monitoring.* +``` + + +### Removing index settings [_removing_index_settings_2] + +If your indices contain index settings that prevent the cluster from forming, you can run the following command to remove one or more index settings. + +```txt +node$ ./bin/elasticsearch-node remove-index-settings index.my_plugin.foo + + WARNING: Elasticsearch MUST be stopped before running this tool. + +You should only run this tool if you have incompatible index settings in the +cluster state that prevent the cluster from forming. +This tool can cause data loss and its use should be your last resort. + +Do you want to proceed? + +Confirm [y/N] y + +Index settings were successfully removed from the cluster state +``` + +You can also use wildcards to remove multiple index settings, for example using + +```txt +node$ ./bin/elasticsearch-node remove-index-settings index.my_plugin.* +``` + + +### Removing custom metadata from the cluster state [_removing_custom_metadata_from_the_cluster_state_2] + +If the on-disk cluster state contains custom metadata that prevents the node from starting up and loading the cluster state, you can run the following commands to remove this custom metadata. + +```txt +node$ ./bin/elasticsearch-node remove-customs snapshot_lifecycle + + WARNING: Elasticsearch MUST be stopped before running this tool. + +The following customs will be removed: +snapshot_lifecycle + +You should only run this tool if you have broken custom metadata in the +cluster state that prevents the cluster state from being loaded. +This tool can cause data loss and its use should be your last resort. + +Do you want to proceed? + +Confirm [y/N] y + +Customs were successfully removed from the cluster state +``` + + +### Unsafe cluster bootstrapping [_unsafe_cluster_bootstrapping] + +Suppose your cluster had five master-eligible nodes and you have permanently lost three of them, leaving two nodes remaining. + +* Run the tool on the first remaining node, but answer `n` at the confirmation step. + +```txt +node_1$ ./bin/elasticsearch-node unsafe-bootstrap + + WARNING: Elasticsearch MUST be stopped before running this tool. + +Current node cluster state (term, version) pair is (4, 12) + +You should only run this tool if you have permanently lost half or more +of the master-eligible nodes in this cluster, and you cannot restore the +cluster from a snapshot. This tool can cause arbitrary data loss and its +use should be your last resort. If you have multiple surviving master +eligible nodes, you should run this tool on the node with the highest +cluster state (term, version) pair. + +Do you want to proceed? + +Confirm [y/N] n +``` + +* Run the tool on the second remaining node, and again answer `n` at the confirmation step. + +```txt +node_2$ ./bin/elasticsearch-node unsafe-bootstrap + + WARNING: Elasticsearch MUST be stopped before running this tool. + +Current node cluster state (term, version) pair is (5, 3) + +You should only run this tool if you have permanently lost half or more +of the master-eligible nodes in this cluster, and you cannot restore the +cluster from a snapshot. This tool can cause arbitrary data loss and its +use should be your last resort. If you have multiple surviving master +eligible nodes, you should run this tool on the node with the highest +cluster state (term, version) pair. + +Do you want to proceed? + +Confirm [y/N] n +``` + +* Since the second node has a greater term it has a fresher cluster state, so it is better to unsafely bootstrap the cluster using this node: + +```txt +node_2$ ./bin/elasticsearch-node unsafe-bootstrap + + WARNING: Elasticsearch MUST be stopped before running this tool. + +Current node cluster state (term, version) pair is (5, 3) + +You should only run this tool if you have permanently lost half or more +of the master-eligible nodes in this cluster, and you cannot restore the +cluster from a snapshot. This tool can cause arbitrary data loss and its +use should be your last resort. If you have multiple surviving master +eligible nodes, you should run this tool on the node with the highest +cluster state (term, version) pair. + +Do you want to proceed? + +Confirm [y/N] y +Master node was successfully bootstrapped +``` + + +### Detaching nodes from their cluster [_detaching_nodes_from_their_cluster] + +After unsafely bootstrapping a new cluster, run the `elasticsearch-node detach-cluster` command to detach all remaining nodes from the failed cluster so they can join the new cluster: + +```txt +node_3$ ./bin/elasticsearch-node detach-cluster + + WARNING: Elasticsearch MUST be stopped before running this tool. + +You should only run this tool if you have permanently lost all of the +master-eligible nodes in this cluster and you cannot restore the cluster +from a snapshot, or you have already unsafely bootstrapped a new cluster +by running `elasticsearch-node unsafe-bootstrap` on a master-eligible +node that belonged to the same cluster as this node. This tool can cause +arbitrary data loss and its use should be your last resort. + +Do you want to proceed? + +Confirm [y/N] y +Node was successfully detached from the cluster +``` + + +### Bypassing version checks [_bypassing_version_checks] + +Run the `elasticsearch-node override-version` command to overwrite the version stored in the data path so that a node can start despite being incompatible with the data stored in the data path: + +```txt +node$ ./bin/elasticsearch-node override-version + + WARNING: Elasticsearch MUST be stopped before running this tool. + +This data path was last written by Elasticsearch version [x.x.x] and may no +longer be compatible with Elasticsearch version [y.y.y]. This tool will bypass +this compatibility check, allowing a version [y.y.y] node to start on this data +path, but a version [y.y.y] node may not be able to read this data or may read +it incorrectly leading to data loss. + +You should not use this tool. Instead, continue to use a version [x.x.x] node +on this data path. If necessary, you can use reindex-from-remote to copy the +data from here into an older cluster. + +Do you want to proceed? + +Confirm [y/N] y +Successfully overwrote this node's metadata to bypass its version compatibility checks. +``` diff --git a/docs/reference/elasticsearch/command-line-tools/reconfigure-node.md b/docs/reference/elasticsearch/command-line-tools/reconfigure-node.md new file mode 100644 index 0000000000000..85510cbbc71f9 --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/reconfigure-node.md @@ -0,0 +1,71 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/reconfigure-node.html +--- + +# elasticsearch-reconfigure-node [reconfigure-node] + +The `elasticsearch-reconfigure-node` tool reconfigures an {{es}} node that was installed through an RPM or DEB package to join an existing cluster with security features enabled. + + +## Synopsis [_synopsis_6] + +```shell +bin/elasticsearch-reconfigure-node +[--enrollment-token] [-h, --help] [-E ] +[-s, --silent] [-v, --verbose] +``` + + +## Description [_description_13] + +When installing {{es}} with a DEB or RPM package, the current node is assumed to be the first node in the cluster. {{es}} enables and configures security features on the node, generates a password for the `elastic` superuser, and configures TLS for the HTTP and transport layers. + +Rather than form a single-node cluster, you can add a node to an existing cluster where security features are already enabled and configured. Before starting your new node, run the [`elasticsearch-create-enrollment-token`](/reference/elasticsearch/command-line-tools/create-enrollment-token.md) tool with the `-s node` option to generate an enrollment token on any node in your existing cluster. On your new node, run the `elasticsearch-reconfigure-node` tool and pass the enrollment token as a parameter. + +::::{note} +This tool is intended only for use on DEB or RPM distributions of {{es}}. +:::: + + +You must run this tool with `sudo` so that it can edit the necessary files in your {{es}} installation configuration directory that are owned by `root:elasticsearch`. + + +## Parameters [reconfigure-node-parameters] + +`--enrollment-token` +: The enrollment token, which can be generated on any of the nodes in an existing, secured cluster. + +`-E ` +: Configures a standard {{es}} or {{xpack}} setting. + +`-h, --help` +: Shows help information. + +`-s, --silent` +: Shows minimal output. + +`-v, --verbose` +: Shows verbose output. + +$$$cli-tool-jvm-options-reconfigure-node$$$ + + +### JVM options [_jvm_options_2] + +CLI tools run with 64MB of heap. For most tools, this value is fine. However, if needed this can be overridden by setting the `CLI_JAVA_OPTS` environment variable. For example, the following increases the heap size used by the `elasticsearch-reconfigure-node` tool to 1GB. + +```shell +export CLI_JAVA_OPTS="-Xmx1g" +bin/elasticsearch-reconfigure-node ... +``` + + +## Examples [_examples_18] + +The following example reconfigures an installed {{es}} node so that it can join an existing cluster when it starts for the first time. + +```shell +sudo /usr/share/elasticsearch/elasticsearch-reconfigure-node --enrollment-token eyJ2ZXIiOiI4LjAuMCIsImFkciI6WyIxOTIuMTY4LjEuMTY6OTIwMCJdLCJmZ3IiOiI4NGVhYzkyMzAyMWQ1MjcyMmQxNTFhMTQwZmM2ODI5NmE5OWNiNmU0OGVhZjYwYWMxYzljM2I3ZDJjOTg2YTk3Iiwia2V5IjoiUy0yUjFINEJrNlFTMkNEY1dVV1g6QS0wSmJxM3hTRy1haWxoQTdPWVduZyJ9 +``` + diff --git a/docs/reference/elasticsearch/command-line-tools/reset-password.md b/docs/reference/elasticsearch/command-line-tools/reset-password.md new file mode 100644 index 0000000000000..bd95c5b1da5e3 --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/reset-password.md @@ -0,0 +1,85 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/reset-password.html +--- + +# elasticsearch-reset-password [reset-password] + +The `elasticsearch-reset-password` command resets the passwords of users in the native realm and built-in users. + + +## Synopsis [_synopsis_7] + +```shell +bin/elasticsearch-reset-password +[-a, --auto] [-b, --batch] [-E ` +: Configures a standard {{es}} or {{xpack}} setting. + +`-f, --force` +: Forces the command to run against an unhealthy cluster. + +`-h, --help` +: Returns all of the command parameters. + +`-i, --interactive` +: Prompts for the password of the specified user. Use this option to explicitly set a password. + +`-s --silent` +: Shows minimal output in the console. + +`-u, --username` +: The username of the native realm user or built-in user. + +`--url` +: Specifies the base URL (hostname and port of the local node) that the tool uses to submit API requests to {{es}}. The default value is determined from the settings in your `elasticsearch.yml` file. If `xpack.security.http.ssl.enabled` is set to `true`, you must specify an HTTPS URL. + +`-v --verbose` +: Shows verbose output in the console. + + +## Examples [_examples_19] + +The following example resets the password of the `elastic` user to an auto-generated value and prints the new password in the console: + +```shell +bin/elasticsearch-reset-password -u elastic +``` + +The following example resets the password of a native user with username `user1` after prompting in the terminal for the desired password: + +```shell +bin/elasticsearch-reset-password --username user1 -i +``` + +The following example resets the password of a native user with username `user2` to an auto-generated value prints the new password in the console. The specified URL indicates where the elasticsearch-reset-password tool attempts to reach the local {{es}} node: + +```shell +bin/elasticsearch-reset-password --url "https://172.0.0.3:9200" --username user2 -i +``` + diff --git a/docs/reference/elasticsearch/command-line-tools/saml-metadata.md b/docs/reference/elasticsearch/command-line-tools/saml-metadata.md new file mode 100644 index 0000000000000..86e6d72d6a0bb --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/saml-metadata.md @@ -0,0 +1,117 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/saml-metadata.html +--- + +# elasticsearch-saml-metadata [saml-metadata] + +The `elasticsearch-saml-metadata` command can be used to generate a SAML 2.0 Service Provider Metadata file. + + +## Synopsis [_synopsis_8] + +```shell +bin/elasticsearch-saml-metadata +[--realm ] +[--out ] [--batch] +[--attribute ] [--service-name ] +[--locale ] [--contacts] +([--organisation-name ] [--organisation-display-name ] [--organisation-url ]) +([--signing-bundle ] | [--signing-cert ][--signing-key ]) +[--signing-key-password ] +[-E ] +[-h, --help] ([-s, --silent] | [-v, --verbose]) +``` + + +## Description [_description_15] + +The SAML 2.0 specification provides a mechanism for Service Providers to describe their capabilities and configuration using a *metadata file*. + +The `elasticsearch-saml-metadata` command generates such a file, based on the configuration of a SAML realm in {{es}}. + +Some SAML Identity Providers will allow you to automatically import a metadata file when you configure the Elastic Stack as a Service Provider. + +You can optionally select to digitally sign the metadata file in order to ensure its integrity and authenticity before sharing it with the Identity Provider. The key used for signing the metadata file need not necessarily be the same as the keys already used in the saml realm configuration for SAML message signing. + +If your {{es}} keystore is password protected, you are prompted to enter the password when you run the `elasticsearch-saml-metadata` command. + + +## Parameters [saml-metadata-parameters] + +`--attribute ` +: Specifies a SAML attribute that should be included as a `` element in the metadata. Any attribute configured in the {{es}} realm is automatically included and does not need to be specified as a commandline option. + +`--batch` +: Do not prompt for user input. + +`--contacts` +: Specifies that the metadata should include one or more `` elements. The user will be prompted to enter the details for each person. + +`-E ` +: Configures an {{es}} setting. + +`-h, --help` +: Returns all of the command parameters. + +`--locale ` +: Specifies the locale to use for metadata elements such as ``. Defaults to the JVM’s default system locale. + +`--organisation-display-name ` element. Only valid if `--organisation-name` is also specified. + +`--organisation-name ` +: Specifies that an `` element should be included in the metadata and provides the value for the ``. If this is specified, then `--organisation-url` must also be specified. + +`--organisation-url ` +: Specifies the value of the `` element. This is required if `--organisation-name` is specified. + +`--out ` +: Specifies a path for the output files. Defaults to `saml-elasticsearch-metadata.xml` + +`--service-name ` +: Specifies the value for the `` element in the metadata. Defaults to `elasticsearch`. + +`--signing-bundle ` +: Specifies the path to an existing key pair (in PKCS#12 format). The private key of that key pair will be used to sign the metadata file. + +`--signing-cert ` +: Specifies the path to an existing certificate (in PEM format) to be used for signing of the metadata file. You must also specify the `--signing-key` parameter. This parameter cannot be used with the `--signing-bundle` parameter. + +`--signing-key ` +: Specifies the path to an existing key (in PEM format) to be used for signing of the metadata file. You must also specify the `--signing-cert` parameter. This parameter cannot be used with the `--signing-bundle` parameter. + +`--signing-key-password ` +: Specifies the password for the signing key. It can be used with either the `--signing-key` or the `--signing-bundle` parameters. + +`--realm ` +: Specifies the name of the realm for which the metadata should be generated. This parameter is required if there is more than 1 `saml` realm in your {{es}} configuration. + +`-s, --silent` +: Shows minimal output. + +`-v, --verbose` +: Shows verbose output. + + +## Examples [_examples_20] + +The following command generates a default metadata file for the `saml1` realm: + +```sh +bin/elasticsearch-saml-metadata --realm saml1 +``` + +The file will be written to `saml-elasticsearch-metadata.xml`. You may be prompted to provide the "friendlyName" value for any attributes that are used by the realm. + +The following command generates a metadata file for the `saml2` realm, with a `` of `kibana-finance`, a locale of `en-GB` and includes `` elements and an `` element: + +```sh +bin/elasticsearch-saml-metadata --realm saml2 \ + --service-name kibana-finance \ + --locale en-GB \ + --contacts \ + --organisation-name "Mega Corp. Finance Team" \ + --organisation-url "http://mega.example.com/finance/" +``` + diff --git a/docs/reference/elasticsearch/command-line-tools/service-tokens-command.md b/docs/reference/elasticsearch/command-line-tools/service-tokens-command.md new file mode 100644 index 0000000000000..0bd6b4cc9643f --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/service-tokens-command.md @@ -0,0 +1,139 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/service-tokens-command.html +--- + +# elasticsearch-service-tokens [service-tokens-command] + +Use the `elasticsearch-service-tokens` command to create, list, and delete file-based service account tokens. + + +## Synopsis [_synopsis_9] + +```shell +bin/elasticsearch-service-tokens +([create ]) | +([list] []) | +([delete ]) +``` + + +## Description [_description_16] + +::::{note} +The recommended way to manage [service tokens](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/service-accounts.md#service-accounts-tokens) is via the [Create service account tokens](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-security-create-service-token) API. File based tokens are intended for use with orchestrators such as [{{ece}}](docs-content://deploy-manage/deploy/cloud-enterprise.md) and [{{eck}}](docs-content://deploy-manage/deploy/cloud-on-k8s.md) +:::: + + +This command creates a `service_tokens` file in the `$ES_HOME/config` directory when you create the first service account token. This file does not exist by default. {{es}} monitors this file for changes and dynamically reloads it. + +This command only makes changes to the `service_tokens` file on the local node. If the service token will be used to authenticate requests against multiple nodes in the cluster then you must copy the `service_tokens` file to each node. + +See [service accounts](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/service-accounts.md) for further information about the behaviour of service accounts and the management of service tokens. + +::::{important} +To ensure that {{es}} can read the service account token information at startup, run `elasticsearch-service-tokens` as the same user you use to run {{es}}. Running this command as `root` or some other user updates the permissions for the `service_tokens` file and prevents {{es}} from accessing it. +:::: + + + +## Parameters [service-tokens-command-parameters] + +`create` +: Creates a service account token for the specified service account. + + ::::{dropdown} Properties of `create` + `` + : (Required, string) Service account principal that takes the format of `/`, where the `namespace` is a top-level grouping of service accounts, and `service` is the name of the service. For example, `elastic/fleet-server`. + + The service account principal must match a known service account. + + + `` + : (Required, string) An identifier for the token name. + + Token names must be at least 1 and no more than 256 characters. They can contain alphanumeric characters (`a-z`, `A-Z`, `0-9`), dashes (`-`), and underscores (`_`), but cannot begin with an underscore. + + ::::{note} + Token names must be unique in the context of the associated service account. + :::: + + + :::: + + +`list` +: Lists all service account tokens defined in the `service_tokens` file. If you specify a service account principal, the command lists only the tokens that belong to the specified service account. + + ::::{dropdown} Properties of `list` + `` + : (Optional, string) Service account principal that takes the format of `/`, where the `namespace` is a top-level grouping of service accounts, and `service` is the name of the service. For example, `elastic/fleet-server`. + + The service account principal must match a known service account. + + + :::: + + +`delete` +: Deletes a service account token for the specified service account. + + ::::{dropdown} Properties of `delete` + `` + : (Required, string) Service account principal that takes the format of `/`, where the `namespace` is a top-level grouping of service accounts, and `service` is the name of the service. For example, `elastic/fleet-server`. + + The service account principal must match a known service account. + + + :::: + + + `` + : (Required, string) Name of an existing token. + + + +## Examples [_examples_21] + +The following command creates a service account token named `my-token` for the `elastic/fleet-server` service account. + +```shell +bin/elasticsearch-service-tokens create elastic/fleet-server my-token +``` + +The output is a bearer token, which is a Base64 encoded string. + +```shell +SERVICE_TOKEN elastic/fleet-server/my-token = AAEAAWVsYXN0aWM...vZmxlZXQtc2VydmVyL3Rva2VuMTo3TFdaSDZ +``` + +Use this bearer token to authenticate with your {{es}} cluster. + +```shell +curl -H "Authorization: Bearer AAEAAWVsYXN0aWM...vZmxlZXQtc2VydmVyL3Rva2VuMTo3TFdaSDZ" http://localhost:9200/_cluster/health +``` + +::::{note} +If your node has `xpack.security.http.ssl.enabled` set to `true`, then you must specify `https` in the request URL. +:::: + + +The following command lists all service account tokens that are defined in the `service_tokens` file. + +```shell +bin/elasticsearch-service-tokens list +``` + +A list of all service account tokens displays in your terminal: + +```txt +elastic/fleet-server/my-token +elastic/fleet-server/another-token +``` + +The following command deletes the `my-token` service account token for the `elastic/fleet-server` service account: + +```shell +bin/elasticsearch-service-tokens delete elastic/fleet-server my-token +``` + diff --git a/docs/reference/elasticsearch/command-line-tools/setup-passwords.md b/docs/reference/elasticsearch/command-line-tools/setup-passwords.md new file mode 100644 index 0000000000000..b14798c651267 --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/setup-passwords.md @@ -0,0 +1,68 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/setup-passwords.html +--- + +# elasticsearch-setup-passwords [setup-passwords] + +::::{admonition} Deprecated in 8.0. +:class: warning + +The `elasticsearch-setup-passwords` tool is deprecated and will be removed in a future release. To manually reset the password for the built-in users (including the `elastic` user), use the [`elasticsearch-reset-password`](/reference/elasticsearch/command-line-tools/reset-password.md) tool, the {{es}} change password API, or the User Management features in {{kib}}. +:::: + + +The `elasticsearch-setup-passwords` command sets the passwords for the [built-in users](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/built-in-users.md). + + +## Synopsis [_synopsis_10] + +```shell +bin/elasticsearch-setup-passwords auto|interactive +[-b, --batch] [-h, --help] [-E ] +[-s, --silent] [-u, --url ""] [-v, --verbose] +``` + + +## Description [_description_17] + +This command is intended for use only during the initial configuration of the {{es}} {{security-features}}. It uses the [`elastic` bootstrap password](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/built-in-users.md#bootstrap-elastic-passwords) to run user management API requests. If your {{es}} keystore is password protected, before you can set the passwords for the built-in users, you must enter the keystore password. After you set a password for the `elastic` user, the bootstrap password is no longer active and you cannot use this command. Instead, you can change passwords by using the **Management > Users** UI in {{kib}} or the [Change Password API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-security-change-password). + +This command uses an HTTP connection to connect to the cluster and run the user management requests. If your cluster uses TLS/SSL on the HTTP layer, the command automatically attempts to establish the connection by using the HTTPS protocol. It configures the connection by using the `xpack.security.http.ssl` settings in the `elasticsearch.yml` file. If you do not use the default config directory location, ensure that the **ES_PATH_CONF** environment variable returns the correct path before you run the `elasticsearch-setup-passwords` command. You can override settings in your `elasticsearch.yml` file by using the `-E` command option. For more information about debugging connection failures, see [Setup-passwords command fails due to connection failure](docs-content://troubleshoot/elasticsearch/security/trb-security-setup.md). + + +## Parameters [setup-passwords-parameters] + +`auto` +: Outputs randomly-generated passwords to the console. + +`-b, --batch` +: If enabled, runs the change password process without prompting the user. + +`-E ` +: Configures a standard {{es}} or {{xpack}} setting. + +`-h, --help` +: Shows help information. + +`interactive` +: Prompts you to manually enter passwords. + +`-s, --silent` +: Shows minimal output. + +`-u, --url ""` +: Specifies the URL that the tool uses to submit the user management API requests. The default value is determined from the settings in your `elasticsearch.yml` file. If `xpack.security.http.ssl.enabled` is set to `true`, you must specify an HTTPS URL. + +`-v, --verbose` +: Shows verbose output. + + +## Examples [_examples_22] + +The following example uses the `-u` parameter to tell the tool where to submit its user management API requests: + +```shell +bin/elasticsearch-setup-passwords auto -u "http://localhost:9201" +``` + diff --git a/docs/reference/elasticsearch/command-line-tools/shard-tool.md b/docs/reference/elasticsearch/command-line-tools/shard-tool.md new file mode 100644 index 0000000000000..c7d11ac472362 --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/shard-tool.md @@ -0,0 +1,124 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/shard-tool.html +--- + +# elasticsearch-shard [shard-tool] + +In some cases the Lucene index or translog of a shard copy can become corrupted. The `elasticsearch-shard` command enables you to remove corrupted parts of the shard if a good copy of the shard cannot be recovered automatically or restored from backup. + +::::{warning} +You will lose the corrupted data when you run `elasticsearch-shard`. This tool should only be used as a last resort if there is no way to recover from another copy of the shard or restore a snapshot. +:::: + + + +## Synopsis [_synopsis_11] + +```shell +bin/elasticsearch-shard remove-corrupted-data + ([--index ] [--shard-id ] | [--dir ]) + [--truncate-clean-translog] + [-E ] + [-h, --help] ([-s, --silent] | [-v, --verbose]) +``` + + +## Description [_description_18] + +When {{es}} detects that a shard’s data is corrupted, it fails that shard copy and refuses to use it. Under normal conditions, the shard is automatically recovered from another copy. If no good copy of the shard is available and you cannot restore one from a snapshot, you can use `elasticsearch-shard` to remove the corrupted data and restore access to any remaining data in unaffected segments. + +::::{warning} +Stop Elasticsearch before running `elasticsearch-shard`. +:::: + + +To remove corrupted shard data use the `remove-corrupted-data` subcommand. + +There are two ways to specify the path: + +* Specify the index name and shard name with the `--index` and `--shard-id` options. +* Use the `--dir` option to specify the full path to the corrupted index or translog files. + +$$$cli-tool-jvm-options-shard$$$ + + +### JVM options [_jvm_options_3] + +CLI tools run with 64MB of heap. For most tools, this value is fine. However, if needed this can be overridden by setting the `CLI_JAVA_OPTS` environment variable. For example, the following increases the heap size used by the `elasticsearch-shard` tool to 1GB. + +```shell +export CLI_JAVA_OPTS="-Xmx1g" +bin/elasticsearch-shard ... +``` + + +### Removing corrupted data [_removing_corrupted_data] + +`elasticsearch-shard` analyses the shard copy and provides an overview of the corruption found. To proceed you must then confirm that you want to remove the corrupted data. + +::::{warning} +Back up your data before running `elasticsearch-shard`. This is a destructive operation that removes corrupted data from the shard. +:::: + + +```txt +$ bin/elasticsearch-shard remove-corrupted-data --index my-index-000001 --shard-id 0 + + + WARNING: Elasticsearch MUST be stopped before running this tool. + + Please make a complete backup of your index before using this tool. + + +Opening Lucene index at /var/lib/elasticsearchdata/indices/P45vf_YQRhqjfwLMUvSqDw/0/index/ + + >> Lucene index is corrupted at /var/lib/elasticsearchdata/indices/P45vf_YQRhqjfwLMUvSqDw/0/index/ + +Opening translog at /var/lib/elasticsearchdata/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/ + + + >> Translog is clean at /var/lib/elasticsearchdata/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/ + + + Corrupted Lucene index segments found - 32 documents will be lost. + + WARNING: YOU WILL LOSE DATA. + +Continue and remove docs from the index ? Y + +WARNING: 1 broken segments (containing 32 documents) detected +Took 0.056 sec total. +Writing... +OK +Wrote new segments file "segments_c" +Marking index with the new history uuid : 0pIBd9VTSOeMfzYT6p0AsA +Changing allocation id V8QXk-QXSZinZMT-NvEq4w to tjm9Ve6uTBewVFAlfUMWjA + +You should run the following command to allocate this shard: + +POST /_cluster/reroute +{ + "commands" : [ + { + "allocate_stale_primary" : { + "index" : "index42", + "shard" : 0, + "node" : "II47uXW2QvqzHBnMcl2o_Q", + "accept_data_loss" : false + } + } + ] +} + +You must accept the possibility of data loss by changing the `accept_data_loss` parameter to `true`. + +Deleted corrupt marker corrupted_FzTSBSuxT7i3Tls_TgwEag from /var/lib/elasticsearchdata/indices/P45vf_YQRhqjfwLMUvSqDw/0/index/ +``` + +When you use `elasticsearch-shard` to drop the corrupted data, the shard’s allocation ID changes. After restarting the node, you must use the [cluster reroute API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-reroute) to tell Elasticsearch to use the new ID. The `elasticsearch-shard` command shows the request that you need to submit. + +You can also use the `-h` option to get a list of all options and parameters that the `elasticsearch-shard` tool supports. + +Finally, you can use the `--truncate-clean-translog` option to truncate the shard’s translog even if it does not appear to be corrupt. + diff --git a/docs/reference/elasticsearch/command-line-tools/syskeygen.md b/docs/reference/elasticsearch/command-line-tools/syskeygen.md new file mode 100644 index 0000000000000..b0b8f2fbe00f3 --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/syskeygen.md @@ -0,0 +1,52 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/syskeygen.html +--- + +# elasticsearch-syskeygen [syskeygen] + +The `elasticsearch-syskeygen` command creates a system key file in the elasticsearch config directory. + + +## Synopsis [_synopsis_12] + +```shell +bin/elasticsearch-syskeygen +[-E ] [-h, --help] +([-s, --silent] | [-v, --verbose]) +``` + + +## Description [_description_19] + +The command generates a `system_key` file, which you can use to symmetrically encrypt sensitive data. For example, you can use this key to prevent {{watcher}} from returning and storing information that contains clear text credentials. See [*Encrypting sensitive data in {{watcher}}*](docs-content://explore-analyze/alerts-cases/watcher/encrypting-data.md). + +::::{important} +The system key is a symmetric key, so the same key must be used on every node in the cluster. +:::: + + + +## Parameters [syskeygen-parameters] + +`-E ` +: Configures a setting. For example, if you have a custom installation of {{es}}, you can use this parameter to specify the `ES_PATH_CONF` environment variable. + +`-h, --help` +: Returns all of the command parameters. + +`-s, --silent` +: Shows minimal output. + +`-v, --verbose` +: Shows verbose output. + + +## Examples [_examples_23] + +The following command generates a `system_key` file in the default `$ES_HOME/config` directory: + +```sh +bin/elasticsearch-syskeygen +``` + diff --git a/docs/reference/elasticsearch/command-line-tools/users-command.md b/docs/reference/elasticsearch/command-line-tools/users-command.md new file mode 100644 index 0000000000000..0bb8a1849f268 --- /dev/null +++ b/docs/reference/elasticsearch/command-line-tools/users-command.md @@ -0,0 +1,111 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/users-command.html +--- + +# elasticsearch-users [users-command] + +If you use file-based user authentication, the `elasticsearch-users` command enables you to add and remove users, assign user roles, and manage passwords per node. + + +## Synopsis [_synopsis_13] + +```shell +bin/elasticsearch-users +([useradd ] [-p ] [-r ]) | +([list] ) | +([passwd ] [-p ]) | +([roles ] [-a ] [-r ]) | +([userdel ]) +``` + + +## Description [_description_20] + +If you use the built-in `file` internal realm, users are defined in local files on each node in the cluster. + +Usernames and roles must be at least 1 and no more than 1024 characters. They can contain alphanumeric characters (`a-z`, `A-Z`, `0-9`), spaces, punctuation, and printable symbols in the [Basic Latin (ASCII) block](https://en.wikipedia.org/wiki/Basic_Latin_(Unicode_block)). Leading or trailing whitespace is not allowed. + +Passwords must be at least 6 characters long. + +For more information, see [File-based user authentication](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/file-based.md). + +::::{tip} +To ensure that {{es}} can read the user and role information at startup, run `elasticsearch-users useradd` as the same user you use to run {{es}}. Running the command as root or some other user updates the permissions for the `users` and `users_roles` files and prevents {{es}} from accessing them. +:::: + + + +## Parameters [users-command-parameters] + +`-a ` +: If used with the `roles` parameter, adds a comma-separated list of roles to a user. + +`list` +: List the users that are registered with the `file` realm on the local node. If you also specify a user name, the command provides information for that user. + +`-p ` +: Specifies the user’s password. If you do not specify this parameter, the command prompts you for the password. + + ::::{tip} + Omit the `-p` option to keep plaintext passwords out of the terminal session’s command history. + :::: + + +`passwd ` +: Resets a user’s password. You can specify the new password directly with the `-p` parameter. + +`-r ` +: * If used with the `useradd` parameter, defines a user’s roles. This option accepts a comma-separated list of role names to assign to the user. +* If used with the `roles` parameter, removes a comma-separated list of roles from a user. + + +`roles` +: Manages the roles of a particular user. You can combine adding and removing roles within the same command to change a user’s roles. + +`useradd ` +: Adds a user to your local node. + +`userdel ` +: Deletes a user from your local node. + + +## Examples [_examples_24] + +The following example adds a new user named `jacknich` to the `file` realm. The password for this user is `theshining`, and this user is associated with the `network` and `monitoring` roles. + +```shell +bin/elasticsearch-users useradd jacknich -p theshining -r network,monitoring +``` + +The following example lists the users that are registered with the `file` realm on the local node: + +```shell +bin/elasticsearch-users list +rdeniro : admin +alpacino : power_user +jacknich : monitoring,network +``` + +Users are in the left-hand column and their corresponding roles are listed in the right-hand column. + +The following example resets the `jacknich` user’s password: + +```shell +bin/elasticsearch-users passwd jachnich +``` + +Since the `-p` parameter was omitted, the command prompts you to enter and confirm a password in interactive mode. + +The following example removes the `network` and `monitoring` roles from the `jacknich` user and adds the `user` role: + +```shell +bin/elasticsearch-users roles jacknich -r network,monitoring -a user +``` + +The following example deletes the `jacknich` user: + +```shell +bin/elasticsearch-users userdel jacknich +``` + diff --git a/docs/reference/elasticsearch/configuration-reference/auding-settings.md b/docs/reference/elasticsearch/configuration-reference/auding-settings.md new file mode 100644 index 0000000000000..d4300e6fd4657 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/auding-settings.md @@ -0,0 +1,109 @@ +--- +navigation_title: "Auditing settings" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/auditing-settings.html +--- + +# Auditing security settings [auditing-settings] + + +$$$auditing-settings-description$$$ +You can use [audit logging](docs-content://deploy-manage/monitor/logging-configuration/enabling-audit-logs.md) to record security-related events, such as authentication failures, refused connections, and data-access events. In addition, changes via the APIs to the security configuration, such as creating, updating and removing [native](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/native.md) and [built-in](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/built-in-users.md) users, [roles](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-security-put-role), [role mappings](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-security-put-role-mapping) and [API keys](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-security-create-api-key) are also recorded. + +::::{tip} +Audit logs are only available on certain subscription levels. For more information, see [{{stack}} subscriptions](https://www.elastic.co/subscriptions). +:::: + +If configured, auditing settings must be set on every node in the cluster. Static settings, such as `xpack.security.audit.enabled`, must be configured in `elasticsearch.yml` on each node. For dynamic auditing settings, use the [cluster update settings API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings) to ensure the setting is the same on all nodes. + +## General Auditing Settings [general-audit-settings] + +$$$xpack-security-audit-enabled$$$ + +`xpack.security.audit.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set to `true` to enable auditing on the node. The default value is `false`. This puts the auditing events in a dedicated file named `_audit.json` on each node. + + If enabled, this setting must be configured in `elasticsearch.yml` on all nodes in the cluster. + + + +## Audited Event Settings [event-audit-settings] + +The events and some other information about what gets logged can be controlled by using the following settings: + +$$$xpack-sa-lf-events-include$$$ + +`xpack.security.audit.logfile.events.include` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Specifies the [kind of events](/reference/elasticsearch/elasticsearch-audit-events.md) to print in the auditing output. In addition, `_all` can be used to exhaustively audit all the events, but this is usually discouraged since it will get very verbose. The default list value contains: `access_denied, access_granted, anonymous_access_denied, authentication_failed, connection_denied, tampered_request, run_as_denied, run_as_granted, security_config_change`. + +$$$xpack-sa-lf-events-exclude$$$ + +`xpack.security.audit.logfile.events.exclude` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Excludes the specified [kind of events](/reference/elasticsearch/elasticsearch-audit-events.md) from the include list. This is useful in the case where the `events.include` setting contains the special value `_all`. The default is the empty list. + +$$$xpack-sa-lf-events-emit-request$$$ + +`xpack.security.audit.logfile.events.emit_request_body` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Specifies whether to include the full request body from REST requests as an attribute of certain kinds of audit events. This setting can be used to [audit search queries](docs-content://deploy-manage/monitor/logging-configuration/auditing-search-queries.md). + + The default value is `false`, so request bodies are not printed. + + ::::{important} + Be advised that sensitive data may be audited in plain text when including the request body in audit events, even though all the security APIs, such as those that change the user’s password, have the credentials filtered out when audited. + :::: + + + +## Local Node Info Settings [node-audit-settings] + +$$$xpack-sa-lf-emit-node-name$$$ + +`xpack.security.audit.logfile.emit_node_name` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Specifies whether to include the [node name](docs-content://deploy-manage/deploy/self-managed/important-settings-configuration.md#node-name) as a field in each audit event. The default value is `false`. + +$$$xpack-sa-lf-emit-node-host-address$$$ + +`xpack.security.audit.logfile.emit_node_host_address` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Specifies whether to include the node’s IP address as a field in each audit event. The default value is `false`. + +$$$xpack-sa-lf-emit-node-host-name$$$ + +`xpack.security.audit.logfile.emit_node_host_name` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Specifies whether to include the node’s host name as a field in each audit event. The default value is `false`. + +$$$xpack-sa-lf-emit-node-id$$$ + +`xpack.security.audit.logfile.emit_node_id` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Specifies whether to include the node id as a field in each audit event. Unlike [node name](docs-content://deploy-manage/deploy/self-managed/important-settings-configuration.md#node-name), whose value might change if the administrator changes the setting in the config file, the node id will persist across cluster restarts and the administrator cannot change it. The default value is `true`. + + +## Audit Logfile Event Ignore Policies [audit-event-ignore-policies] + +The following settings affect the [ignore policies](docs-content://deploy-manage/monitor/logging-configuration/logfile-audit-events-ignore-policies.md) that enable fine-grained control over which audit events are printed to the log file. All of the settings with the same policy name combine to form a single policy. If an event matches all the conditions of any policy, it is ignored and not printed. Most audit events are subject to the ignore policies. The sole exception are events of the `security_config_change` type, which cannot be filtered out, unless [excluded](#xpack-sa-lf-events-exclude) altogether. + +$$$xpack-sa-lf-events-ignore-users$$$ + +`xpack.security.audit.logfile.events.ignore_filters..users` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A list of user names or wildcards. The specified policy will not print audit events for users matching these values. + +$$$xpack-sa-lf-events-ignore-realms$$$ + +`xpack.security.audit.logfile.events.ignore_filters..realms` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A list of authentication realm names or wildcards. The specified policy will not print audit events for users in these realms. + +$$$xpack-sa-lf-events-ignore-actions$$$ + +`xpack.security.audit.logfile.events.ignore_filters..actions` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A list of action names or wildcards. Action name can be found in the `action` field of the audit event. The specified policy will not print audit events for actions matching these values. + +$$$xpack-sa-lf-events-ignore-roles$$$ + +`xpack.security.audit.logfile.events.ignore_filters..roles` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A list of role names or wildcards. The specified policy will not print audit events for users that have these roles. If the user has several roles, some of which are **not** covered by the policy, the policy will **not** cover this event. + +$$$xpack-sa-lf-events-ignore-indices$$$ + +`xpack.security.audit.logfile.events.ignore_filters..indices` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A list of index names or wildcards. The specified policy will not print audit events when all the indices in the event match these values. If the event concerns several indices, some of which are **not** covered by the policy, the policy will **not** cover this event. + + diff --git a/docs/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md b/docs/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md new file mode 100644 index 0000000000000..7c5a593d178de --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md @@ -0,0 +1,151 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/circuit-breaker.html +--- + +# Circuit breaker settings [circuit-breaker] + +$$$circuit-breaker-description$$$ +{{es}} contains multiple circuit breakers used to prevent operations from using an excessive amount of memory. Each breaker tracks the memory used by certain operations and specifies a limit for how much memory it may track. Additionally, there is a parent-level breaker that specifies the total amount of memory that may be tracked across all breakers. + +When a circuit breaker reaches its limit, {{es}} will reject further operations. See [Circuit breaker errors](docs-content://troubleshoot/elasticsearch/circuit-breaker-errors.md) for information about errors raised by circuit breakers. + +Circuit breakers do not track all memory usage in {{es}} and therefore provide only incomplete protection against excessive memory usage. If {{es}} uses too much memory then it may suffer from performance issues and nodes may even fail with an `OutOfMemoryError`. See [High JVM memory pressure](docs-content://troubleshoot/elasticsearch/high-jvm-memory-pressure.md) for help with troubleshooting high heap usage. + +Except where noted otherwise, these settings can be dynamically updated on a live cluster with the [cluster-update-settings](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings) API. + +For information about circuit breaker errors, see [Circuit breaker errors](docs-content://troubleshoot/elasticsearch/circuit-breaker-errors.md). + + +### Parent circuit breaker [parent-circuit-breaker] + +The parent-level breaker can be configured with the following settings: + +`indices.breaker.total.use_real_memory` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Determines whether the parent breaker should take real memory usage into account (`true`) or only consider the amount that is reserved by child circuit breakers (`false`). Defaults to `true`. + +$$$indices-breaker-total-limit$$$ + +`indices.breaker.total.limit` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Starting limit for overall parent breaker. Defaults to 70% of JVM heap if `indices.breaker.total.use_real_memory` is `false`. If `indices.breaker.total.use_real_memory` is `true`, defaults to 95% of the JVM heap. + + +### Field data circuit breaker [fielddata-circuit-breaker] + +The field data circuit breaker estimates the heap memory required to load a field into the [field data cache](/reference/elasticsearch/configuration-reference/field-data-cache-settings.md). If loading the field would cause the cache to exceed a predefined memory limit, the circuit breaker stops the operation and returns an error. + +$$$fielddata-circuit-breaker-limit$$$ + +`indices.breaker.fielddata.limit` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Limit for fielddata breaker. Defaults to 40% of JVM heap. + +$$$fielddata-circuit-breaker-overhead$$$ + +`indices.breaker.fielddata.overhead` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A constant that all field data estimations are multiplied with to determine a final estimation. Defaults to `1.03`. + + +### Request circuit breaker [request-circuit-breaker] + +The request circuit breaker allows Elasticsearch to prevent per-request data structures (for example, memory used for calculating aggregations during a request) from exceeding a certain amount of memory. + +$$$request-breaker-limit$$$ + +`indices.breaker.request.limit` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Limit for request breaker, defaults to 60% of JVM heap. + +$$$request-breaker-overhead$$$ + +`indices.breaker.request.overhead` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A constant that all request estimations are multiplied with to determine a final estimation. Defaults to `1`. + + +### In flight requests circuit breaker [in-flight-circuit-breaker] + +The in flight requests circuit breaker allows Elasticsearch to limit the memory usage of all currently active incoming requests on transport or HTTP level from exceeding a certain amount of memory on a node. The memory usage is based on the content length of the request itself. This circuit breaker also considers that memory is not only needed for representing the raw request but also as a structured object which is reflected by default overhead. + +`network.breaker.inflight_requests.limit` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Limit for in flight requests breaker, defaults to 100% of JVM heap. This means that it is bound by the limit configured for the parent circuit breaker. + +`network.breaker.inflight_requests.overhead` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A constant that all in flight requests estimations are multiplied with to determine a final estimation. Defaults to 2. + + +### Script compilation circuit breaker [script-compilation-circuit-breaker] + +Slightly different than the previous memory-based circuit breaker, the script compilation circuit breaker limits the number of inline script compilations within a period of time. + +See the "prefer-parameters" section of the [scripting](docs-content://explore-analyze/scripting/modules-scripting-using.md) documentation for more information. + +`script.max_compilations_rate` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Limit for the number of unique dynamic scripts within a certain interval that are allowed to be compiled. Defaults to `150/5m`, meaning 150 every 5 minutes. + +If the cluster regularly hits the given `max_compilation_rate`, it’s possible the script cache is undersized, use [Nodes Stats](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-nodes-stats) to inspect the number of recent cache evictions, `script.cache_evictions_history` and compilations `script.compilations_history`. If there are a large number of recent cache evictions or compilations, the script cache may be undersized, consider doubling the size of the script cache via the setting `script.cache.max_size`. + + +### Regex circuit breaker [regex-circuit-breaker] + +Poorly written regular expressions can degrade cluster stability and performance. The regex circuit breaker limits the use and complexity of [regex in Painless scripts](/reference/scripting-languages/painless/painless-regexes.md). + +$$$script-painless-regex-enabled$$$ + +`script.painless.regex.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Enables regex in Painless scripts. Accepts: + + `limited` (Default) + : Enables regex but limits complexity using the [`script.painless.regex.limit-factor`](#script-painless-regex-limit-factor) cluster setting. + + `true` + : Enables regex with no complexity limits. Disables the regex circuit breaker. + + `false` + : Disables regex. Any Painless script containing a regular expression returns an error. + + +$$$script-painless-regex-limit-factor$$$ + +`script.painless.regex.limit-factor` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Limits the number of characters a regular expression in a Painless script can consider. {{es}} calculates this limit by multiplying the setting value by the script input’s character length. + + For example, the input `foobarbaz` has a character length of `9`. If `script.painless.regex.limit-factor` is `6`, a regular expression on `foobarbaz` can consider up to 54 (9 * 6) characters. If the expression exceeds this limit, it triggers the regex circuit breaker and returns an error. + + {{es}} only applies this limit if [`script.painless.regex.enabled`](#script-painless-regex-enabled) is `limited`. + + + +## EQL circuit breaker [circuit-breakers-page-eql] + +When a [sequence](/reference/query-languages/eql-syntax.md#eql-sequences) query is executed, the node handling the query needs to keep some structures in memory, which are needed by the algorithm implementing the sequence matching. When large amounts of data need to be processed, and/or a large amount of matched sequences is requested by the user (by setting the [size](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-eql-search) query param), the memory occupied by those structures could potentially exceed the available memory of the JVM. This would cause an `OutOfMemory` exception which would bring down the node. + +To prevent this from happening, a special circuit breaker is used, which limits the memory allocation during the execution of a [sequence](/reference/query-languages/eql-syntax.md#eql-sequences) query. When the breaker is triggered, an `org.elasticsearch.common.breaker.CircuitBreakingException` is thrown and a descriptive error message including `circuit_breaking_exception` is returned to the user. + +This circuit breaker can be configured using the following settings: + +`breaker.eql_sequence.limit` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The limit for circuit breaker used to restrict the memory utilisation during the execution of an EQL sequence query. This value is defined as a percentage of the JVM heap. Defaults to `50%`. If the [parent circuit breaker](#parent-circuit-breaker) is set to a value less than `50%`, this setting uses that value as its default instead. + +`breaker.eql_sequence.overhead` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) A constant that sequence query memory estimates are multiplied by to determine a final estimate. Defaults to `1`. + +`breaker.eql_sequence.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Circuit breaker type. Valid values are: + + `memory` (Default) + : The breaker limits memory usage for EQL sequence queries. + + `noop` + : Disables the breaker. + + + +### {{ml-cap}} circuit breaker [circuit-breakers-page-model-inference] + +`breaker.model_inference.limit` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The limit for the trained model circuit breaker. This value is defined as a percentage of the JVM heap. Defaults to `50%`. If the [parent circuit breaker](#parent-circuit-breaker) is set to a value less than `50%`, this setting uses that value as its default instead. + +`breaker.model_inference.overhead` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) A constant that all trained model estimations are multiplied by to determine a final estimation. Defaults to `1`. + +`breaker.model_inference.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The underlying type of the circuit breaker. There are two valid options: `noop` and `memory`. `noop` means the circuit breaker does nothing to prevent too much memory usage. `memory` means the circuit breaker tracks the memory used by trained models and can potentially break and prevent `OutOfMemory` errors. The default value is `memory`. + diff --git a/docs/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings.md b/docs/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings.md new file mode 100644 index 0000000000000..3ff51e090548b --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings.md @@ -0,0 +1,302 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-cluster.html +--- + +# Cluster-level shard allocation and routing settings [modules-cluster] + +Shard allocation is the process of assigning shard copies to nodes. This can happen during initial recovery, replica allocation, rebalancing, when nodes are added to or removed from the cluster, or when cluster or index settings that impact allocation are updated. + +One of the main roles of the master is to decide which shards to allocate to which nodes, and when to move shards between nodes in order to rebalance the cluster. + +There are a number of settings available to control the shard allocation process: + +* [Cluster-level shard allocation settings](#cluster-shard-allocation-settings) control allocation and rebalancing operations. +* [Disk-based shard allocation settings](#disk-based-shard-allocation) explains how Elasticsearch takes available disk space into account, and the related settings. +* [Shard allocation awareness](docs-content://deploy-manage/distributed-architecture/shard-allocation-relocation-recovery/shard-allocation-awareness.md) and [Forced awareness](docs-content://deploy-manage/distributed-architecture/shard-allocation-relocation-recovery/shard-allocation-awareness.md#forced-awareness) control how shards can be distributed across different racks or availability zones. +* [Cluster-level shard allocation filtering](#cluster-shard-allocation-filtering) allows certain nodes or groups of nodes excluded from allocation so that they can be decommissioned. + +Besides these, there are a few other [miscellaneous cluster-level settings](/reference/elasticsearch/configuration-reference/miscellaneous-cluster-settings.md). + +## Cluster-level shard allocation settings [cluster-shard-allocation-settings] + +You can use the following settings to control shard allocation and recovery: + +$$$cluster-routing-allocation-enable$$$ + +`cluster.routing.allocation.enable` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Enable or disable allocation for specific kinds of shards: + +* `all` - (default) Allows shard allocation for all kinds of shards. +* `primaries` - Allows shard allocation only for primary shards. +* `new_primaries` - Allows shard allocation only for primary shards for new indices. +* `none` - No shard allocations of any kind are allowed for any indices. + +This setting only affects future allocations, and does not re-allocate or un-allocate currently allocated shards. It also does not affect the recovery of local primary shards when restarting a node. A restarted node that has a copy of an unassigned primary shard will recover that primary immediately, assuming that its allocation id matches one of the active allocation ids in the cluster state. + + +$$$cluster-routing-allocation-same-shard-host$$$ + +`cluster.routing.allocation.same_shard.host` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) If `true`, forbids multiple copies of a shard from being allocated to distinct nodes on the same host, i.e. which have the same network address. Defaults to `false`, meaning that copies of a shard may sometimes be allocated to nodes on the same host. This setting is only relevant if you run multiple nodes on each host. + +`cluster.routing.allocation.node_concurrent_incoming_recoveries` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) How many concurrent incoming shard recoveries are allowed to happen on a node. Incoming recoveries are the recoveries where the target shard (most likely the replica unless a shard is relocating) is allocated on the node. Defaults to `2`. Increasing this setting may cause shard movements to have a performance impact on other activity in your cluster, but may not make shard movements complete noticeably sooner. We do not recommend adjusting this setting from its default of `2`. + +`cluster.routing.allocation.node_concurrent_outgoing_recoveries` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) How many concurrent outgoing shard recoveries are allowed to happen on a node. Outgoing recoveries are the recoveries where the source shard (most likely the primary unless a shard is relocating) is allocated on the node. Defaults to `2`. Increasing this setting may cause shard movements to have a performance impact on other activity in your cluster, but may not make shard movements complete noticeably sooner. We do not recommend adjusting this setting from its default of `2`. + +`cluster.routing.allocation.node_concurrent_recoveries` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A shortcut to set both `cluster.routing.allocation.node_concurrent_incoming_recoveries` and `cluster.routing.allocation.node_concurrent_outgoing_recoveries`. The value of this setting takes effect only when the more specific setting is not configured. Defaults to `2`. Increasing this setting may cause shard movements to have a performance impact on other activity in your cluster, but may not make shard movements complete noticeably sooner. We do not recommend adjusting this setting from its default of `2`. + +`cluster.routing.allocation.node_initial_primaries_recoveries` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) While the recovery of replicas happens over the network, the recovery of an unassigned primary after node restart uses data from the local disk. These should be fast so more initial primary recoveries can happen in parallel on each node. Defaults to `4`. Increasing this setting may cause shard recoveries to have a performance impact on other activity in your cluster, but may not make shard recoveries complete noticeably sooner. We do not recommend adjusting this setting from its default of `4`. + + +## Shard rebalancing settings [shards-rebalancing-settings] + +A cluster is *balanced* when it has an equal number of shards on each node, with all nodes needing equal resources, without having a concentration of shards from any index on any node. {{es}} runs an automatic process called *rebalancing* which moves shards between the nodes in your cluster to improve its balance. Rebalancing obeys all other shard allocation rules such as [allocation filtering](#cluster-shard-allocation-filtering) and [forced awareness](docs-content://deploy-manage/distributed-architecture/shard-allocation-relocation-recovery/shard-allocation-awareness.md#forced-awareness) which may prevent it from completely balancing the cluster. In that case, rebalancing strives to achieve the most balanced cluster possible within the rules you have configured. If you are using [data tiers](docs-content://manage-data/lifecycle/data-tiers.md) then {{es}} automatically applies allocation filtering rules to place each shard within the appropriate tier. These rules mean that the balancer works independently within each tier. + +You can use the following settings to control the rebalancing of shards across the cluster: + +`cluster.routing.allocation.allow_rebalance` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Specify when shard rebalancing is allowed: + +* `always` - (default) Always allow rebalancing. +* `indices_primaries_active` - Only when all primaries in the cluster are allocated. +* `indices_all_active` - Only when all shards (primaries and replicas) in the cluster are allocated. + + +`cluster.routing.rebalance.enable` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Enable or disable rebalancing for specific kinds of shards: + +* `all` - (default) Allows shard balancing for all kinds of shards. +* `primaries` - Allows shard balancing only for primary shards. +* `replicas` - Allows shard balancing only for replica shards. +* `none` - No shard balancing of any kind are allowed for any indices. + +Rebalancing is important to ensure the cluster returns to a healthy and fully resilient state after a disruption. If you adjust this setting, remember to set it back to `all` as soon as possible. + + +`cluster.routing.allocation.cluster_concurrent_rebalance` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Defines the number of concurrent shard rebalances are allowed across the whole cluster. Defaults to `2`. Note that this setting only controls the number of concurrent shard relocations due to imbalances in the cluster. This setting does not limit shard relocations due to [allocation filtering](#cluster-shard-allocation-filtering) or [forced awareness](docs-content://deploy-manage/distributed-architecture/shard-allocation-relocation-recovery/shard-allocation-awareness.md#forced-awareness). Increasing this setting may cause the cluster to use additional resources moving shards between nodes, so we generally do not recommend adjusting this setting from its default of `2`. + +`cluster.routing.allocation.type` +: Selects the algorithm used for computing the cluster balance. Defaults to `desired_balance` which selects the *desired balance allocator*. This allocator runs a background task which computes the desired balance of shards in the cluster. Once this background task completes, {{es}} moves shards to their desired locations. + +[8.8] May also be set to `balanced` to select the legacy *balanced allocator*. This allocator was the default allocator in versions of {{es}} before 8.6.0. It runs in the foreground, preventing the master from doing other work in parallel. It works by selecting a small number of shard movements which immediately improve the balance of the cluster, and when those shard movements complete it runs again and selects another few shards to move. Since this allocator makes its decisions based only on the current state of the cluster, it will sometimes move a shard several times while balancing the cluster. + + + +## Shard balancing heuristics settings [shards-rebalancing-heuristics] + +Rebalancing works by computing a *weight* for each node based on its allocation of shards, and then moving shards between nodes to reduce the weight of the heavier nodes and increase the weight of the lighter ones. The cluster is balanced when there is no possible shard movement that can bring the weight of any node closer to the weight of any other node by more than a configurable threshold. + +The weight of a node depends on the number of shards it holds and on the total estimated resource usage of those shards expressed in terms of the size of the shard on disk and the number of threads needed to support write traffic to the shard. {{es}} estimates the resource usage of shards belonging to data streams when they are created by a rollover. The estimated disk size of the new shard is the mean size of the other shards in the data stream. The estimated write load of the new shard is a weighted average of the actual write loads of recent shards in the data stream. Shards that do not belong to the write index of a data stream have an estimated write load of zero. + +The following settings control how {{es}} combines these values into an overall measure of each node’s weight. + +`cluster.routing.allocation.balance.threshold` +: (float, [Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The minimum improvement in weight which triggers a rebalancing shard movement. Defaults to `1.0f`. Raising this value will cause {{es}} to stop rebalancing shards sooner, leaving the cluster in a more unbalanced state. + +`cluster.routing.allocation.balance.shard` +: (float, [Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Defines the weight factor for the total number of shards allocated to each node. Defaults to `0.45f`. Raising this value increases the tendency of {{es}} to equalize the total number of shards across nodes ahead of the other balancing variables. + +`cluster.routing.allocation.balance.index` +: (float, [Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Defines the weight factor for the number of shards per index allocated to each node. Defaults to `0.55f`. Raising this value increases the tendency of {{es}} to equalize the number of shards of each index across nodes ahead of the other balancing variables. + +`cluster.routing.allocation.balance.disk_usage` +: (float, [Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Defines the weight factor for balancing shards according to their predicted disk size in bytes. Defaults to `2e-11f`. Raising this value increases the tendency of {{es}} to equalize the total disk usage across nodes ahead of the other balancing variables. + +`cluster.routing.allocation.balance.write_load` +: (float, [Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Defines the weight factor for the write load of each shard, in terms of the estimated number of indexing threads needed by the shard. Defaults to `10.0f`. Raising this value increases the tendency of {{es}} to equalize the total write load across nodes ahead of the other balancing variables. + +::::{note} +* If you have a large cluster, it may be unnecessary to keep it in a perfectly balanced state at all times. It is less resource-intensive for the cluster to operate in a somewhat unbalanced state rather than to perform all the shard movements needed to achieve the perfect balance. If so, increase the value of `cluster.routing.allocation.balance.threshold` to define the acceptable imbalance between nodes. For instance, if you have an average of 500 shards per node and can accept a difference of 5% (25 typical shards) between nodes, set `cluster.routing.allocation.balance.threshold` to `25`. +* We do not recommend adjusting the values of the heuristic weight factor settings. The default values work well in all reasonable clusters. Although different values may improve the current balance in some ways, it is possible that they will create unexpected problems in the future or prevent it from gracefully handling an unexpected disruption. +* Regardless of the result of the balancing algorithm, rebalancing might not be allowed due to allocation rules such as forced awareness and allocation filtering. Use the [Cluster allocation explain](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-allocation-explain) API to explain the current allocation of shards. + +:::: + + + +## Disk-based shard allocation settings [disk-based-shard-allocation] + +$$$disk-based-shard-allocation-description$$$ +The disk-based shard allocator ensures that all nodes have enough disk space without performing more shard movements than necessary. It allocates shards based on a pair of thresholds known as the *low watermark* and the *high watermark*. Its primary goal is to ensure that no node exceeds the high watermark, or at least that any such overage is only temporary. If a node exceeds the high watermark then {{es}} will solve this by moving some of its shards onto other nodes in the cluster. + +::::{note} +It is normal for nodes to temporarily exceed the high watermark from time to time. +:::: + + +The allocator also tries to keep nodes clear of the high watermark by forbidding the allocation of more shards to a node that exceeds the low watermark. Importantly, if all of your nodes have exceeded the low watermark then no new shards can be allocated and {{es}} will not be able to move any shards between nodes in order to keep the disk usage below the high watermark. You must ensure that your cluster has enough disk space in total and that there are always some nodes below the low watermark. + +Shard movements triggered by the disk-based shard allocator must also satisfy all other shard allocation rules such as [allocation filtering](#cluster-shard-allocation-filtering) and [forced awareness](docs-content://deploy-manage/distributed-architecture/shard-allocation-relocation-recovery/shard-allocation-awareness.md#forced-awareness). If these rules are too strict then they can also prevent the shard movements needed to keep the nodes' disk usage under control. If you are using [data tiers](docs-content://manage-data/lifecycle/data-tiers.md) then {{es}} automatically configures allocation filtering rules to place shards within the appropriate tier, which means that the disk-based shard allocator works independently within each tier. + +If a node is filling up its disk faster than {{es}} can move shards elsewhere then there is a risk that the disk will completely fill up. To prevent this, as a last resort, once the disk usage reaches the *flood-stage* watermark {{es}} will block writes to indices with a shard on the affected node. It will also continue to move shards onto the other nodes in the cluster. When disk usage on the affected node drops below the high watermark, {{es}} automatically removes the write block. Refer to [Fix watermark errors](docs-content://troubleshoot/elasticsearch/fix-watermark-errors.md) to resolve persistent watermark errors. + +::::{admonition} Max headroom settings +:class: note + +Max headroom settings apply only when watermark settings are percentages or ratios. + +A max headroom value is intended to cap the required free disk space before hitting the respective watermark. This is useful for servers with larger disks, where a percentage or ratio watermark could translate to an overly large free disk space requirement. In this case, the max headroom can be used to cap the required free disk space amount. + +For example, where `cluster.routing.allocation.disk.watermark.flood_stage` is 95% and `cluster.routing.allocation.disk.watermark.flood_stage.max_headroom` is 100GB, this means that: + +* For a smaller disk, e.g., of 100GB, the flood watermark will hit at 95%, meaning at 5GB of free space, since 5GB is smaller than the 100GB max headroom value. +* For a larger disk, e.g., of 100TB, the flood watermark will hit at 100GB of free space. That is because the 95% flood watermark alone would require 5TB of free disk space, but is capped by the max headroom setting to 100GB. + +Max headroom settings have their default values only if their respective watermark settings are not explicitly set. If watermarks are explicitly set, then the max headroom settings do not have their default values, and need to be explicitly set if they are needed. + +:::: + + +::::{tip} +:name: disk-based-shard-allocation-does-not-balance + +It is normal for the nodes in your cluster to be using very different amounts of disk space. The [balance](#shards-rebalancing-settings) of the cluster depends on a combination of factors which includes the number of shards on each node, the indices to which those shards belong, and the resource needs of each shard in terms of its size on disk and its CPU usage. {{es}} must trade off all of these factors against each other, and a cluster which is balanced when looking at the combination of all of these factors may not appear to be balanced if you focus attention on just one of them. + +:::: + + +You can use the following settings to control disk-based allocation: + +$$$cluster-routing-disk-threshold$$$ + +`cluster.routing.allocation.disk.threshold_enabled` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Defaults to `true`. Set to `false` to disable the disk allocation decider. Upon disabling, it will also remove any existing `index.blocks.read_only_allow_delete` index blocks. + +$$$cluster-routing-watermark-low$$$ + +`cluster.routing.allocation.disk.watermark.low` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Controls the low watermark for disk usage. It defaults to `85%`, meaning that {{es}} will not allocate shards to nodes that have more than 85% disk used. It can alternatively be set to a ratio value, e.g., `0.85`. It can also be set to an absolute byte value (like `500mb`) to prevent {{es}} from allocating shards if less than the specified amount of space is available. This setting has no effect on the primary shards of newly-created indices but will prevent their replicas from being allocated. + +`cluster.routing.allocation.disk.watermark.low.max_headroom` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Controls the max headroom for the low watermark (in case of a percentage/ratio value). Defaults to 200GB when `cluster.routing.allocation.disk.watermark.low` is not explicitly set. This caps the amount of free space required. + +$$$cluster-routing-watermark-high$$$ + +`cluster.routing.allocation.disk.watermark.high` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Controls the high watermark. It defaults to `90%`, meaning that {{es}} will attempt to relocate shards away from a node whose disk usage is above 90%. It can alternatively be set to a ratio value, e.g., `0.9`. It can also be set to an absolute byte value (similarly to the low watermark) to relocate shards away from a node if it has less than the specified amount of free space. This setting affects the allocation of all shards, whether previously allocated or not. + +`cluster.routing.allocation.disk.watermark.high.max_headroom` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Controls the max headroom for the high watermark (in case of a percentage/ratio value). Defaults to 150GB when `cluster.routing.allocation.disk.watermark.high` is not explicitly set. This caps the amount of free space required. + +`cluster.routing.allocation.disk.watermark.enable_for_single_data_node` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) In earlier releases, the default behaviour was to disregard disk watermarks for a single data node cluster when making an allocation decision. This is deprecated behavior since 7.14 and has been removed in 8.0. The only valid value for this setting is now `true`. The setting will be removed in a future release. + +$$$cluster-routing-flood-stage$$$ + +`cluster.routing.allocation.disk.watermark.flood_stage` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Controls the flood stage watermark, which defaults to 95%. {{es}} enforces a read-only index block ([`index.blocks.read_only_allow_delete`](/reference/elasticsearch/index-settings/index-block.md)) on every index that has one or more shards allocated on the node, and that has at least one disk exceeding the flood stage. This setting is a last resort to prevent nodes from running out of disk space. The index block is automatically released when the disk utilization falls below the high watermark. Similarly to the low and high watermark values, it can alternatively be set to a ratio value, e.g., `0.95`, or an absolute byte value. + + +`cluster.routing.allocation.disk.watermark.flood_stage.max_headroom` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Controls the max headroom for the flood stage watermark (in case of a percentage/ratio value). Defaults to 100GB when `cluster.routing.allocation.disk.watermark.flood_stage` is not explicitly set. This caps the amount of free space required. + +::::{note} +You can’t mix the usage of percentage/ratio values and byte values across the `cluster.routing.allocation.disk.watermark.low`, `cluster.routing.allocation.disk.watermark.high`, and `cluster.routing.allocation.disk.watermark.flood_stage` settings. Either all values must be set to percentage/ratio values, or all must be set to byte values. This is required so that {{es}} can validate that the settings are internally consistent, ensuring that the low disk threshold is less than the high disk threshold, and the high disk threshold is less than the flood stage threshold. A similar comparison check is done for the max headroom values. +:::: + + +$$$cluster-routing-flood-stage-frozen$$$ + +`cluster.routing.allocation.disk.watermark.flood_stage.frozen` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Controls the flood stage watermark for dedicated frozen nodes, which defaults to 95%. + +`cluster.routing.allocation.disk.watermark.flood_stage.frozen.max_headroom` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Controls the max headroom for the flood stage watermark (in case of a percentage/ratio value) for dedicated frozen nodes. Defaults to 20GB when `cluster.routing.allocation.disk.watermark.flood_stage.frozen` is not explicitly set. This caps the amount of free space required on dedicated frozen nodes. + +`cluster.info.update.interval` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) How often {{es}} should check on disk usage for each node in the cluster. Defaults to `30s`. + +::::{note} +Percentage values refer to used disk space, while byte values refer to free disk space. This can be confusing, because it flips the meaning of high and low. For example, it makes sense to set the low watermark to 10gb and the high watermark to 5gb, but not the other way around. +:::: + + + +## Shard allocation awareness settings [shard-allocation-awareness-settings] + +You can use [custom node attributes](/reference/elasticsearch/configuration-reference/node-settings.md#custom-node-attributes) as *awareness attributes* to enable {{es}} to take your physical hardware configuration into account when allocating shards. If {{es}} knows which nodes are on the same physical server, in the same rack, or in the same zone, it can distribute the primary shard and its replica shards to minimize the risk of losing all shard copies in the event of a failure. [Learn more about shard allocation awareness](docs-content://deploy-manage/distributed-architecture/shard-allocation-relocation-recovery/shard-allocation-awareness.md). + +`cluster.routing.allocation.awareness.attributes` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The node attributes that {{es}} should use as awareness attributes. For example, if you have a `rack_id` attribute that specifies the rack in which each node resides, you can set this setting to `rack_id` to ensure that primary and replica shards are not allocated on the same rack. You can specify multiple attributes as a comma-separated list. + +`cluster.routing.allocation.awareness.force.*` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The shard allocation awareness values that must exist for shards to be reallocated in case of location failure. Learn more about [forced awareness](docs-content://deploy-manage/distributed-architecture/shard-allocation-relocation-recovery/shard-allocation-awareness.md#forced-awareness). + + +## Cluster-level shard allocation filterin [cluster-shard-allocation-filtering] + +You can use cluster-level shard allocation filters to control where {{es}} allocates shards from any index. These cluster wide filters are applied in conjunction with [per-index allocation filtering](/reference/elasticsearch/index-settings/shard-allocation.md) and [allocation awareness](docs-content://deploy-manage/distributed-architecture/shard-allocation-relocation-recovery/shard-allocation-awareness.md). + +Shard allocation filters can be based on [custom node attributes](/reference/elasticsearch/configuration-reference/node-settings.md#custom-node-attributes) or the built-in `_name`, `_host_ip`, `_publish_ip`, `_ip`, `_host`, `_id` and `_tier` attributes. + +The `cluster.routing.allocation` settings are [dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), enabling live indices to be moved from one set of nodes to another. Shards are only relocated if it is possible to do so without breaking another routing constraint, such as never allocating a primary and replica shard on the same node. + +The most common use case for cluster-level shard allocation filtering is when you want to decommission a node. To move shards off of a node prior to shutting it down, you could create a filter that excludes the node by its IP address: + +```console +PUT _cluster/settings +{ + "persistent" : { + "cluster.routing.allocation.exclude._ip" : "10.0.0.1" + } +} +``` + +### Cluster routing settings [cluster-routing-settings] + +`cluster.routing.allocation.include.{{attribute}}` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Allocate shards to a node whose `{{attribute}}` has at least one of the comma-separated values. + +`cluster.routing.allocation.require.{{attribute}}` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Only allocate shards to a node whose `{{attribute}}` has *all* of the comma-separated values. + +`cluster.routing.allocation.exclude.{{attribute}}` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Do not allocate shards to a node whose `{{attribute}}` has *any* of the comma-separated values. + +The cluster allocation settings support the following built-in attributes: + +`_name` +: Match nodes by node name + +`_host_ip` +: Match nodes by host IP address (IP associated with hostname) + +`_publish_ip` +: Match nodes by publish IP address + +`_ip` +: Match either `_host_ip` or `_publish_ip` + +`_host` +: Match nodes by hostname + +`_id` +: Match nodes by node id + +`_tier` +: Match nodes by the node’s [data tier](docs-content://manage-data/lifecycle/data-tiers.md) role + +::::{note} +`_tier` filtering is based on [node](/reference/elasticsearch/configuration-reference/node-settings.md) roles. Only a subset of roles are [data tier](docs-content://manage-data/lifecycle/data-tiers.md) roles, and the generic [data role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#data-node-role) will match any tier filtering. a subset of roles that are [data tier](docs-content://manage-data/lifecycle/data-tiers.md) roles, but the generic [data role](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#data-node-role) will match any tier filtering. +:::: + + +You can use wildcards when specifying attribute values, for example: + +```console +PUT _cluster/settings +{ + "persistent": { + "cluster.routing.allocation.exclude._ip": "192.168.2.*" + } +} +``` + + + diff --git a/docs/reference/elasticsearch/configuration-reference/cross-cluster-replication-settings.md b/docs/reference/elasticsearch/configuration-reference/cross-cluster-replication-settings.md new file mode 100644 index 0000000000000..c32cd1842e435 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/cross-cluster-replication-settings.md @@ -0,0 +1,34 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/ccr-settings.html +--- + +# Cross-cluster replication settings [ccr-settings] + +These {{ccr}} settings can be dynamically updated on a live cluster with the [cluster update settings API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings). + + +## Remote recovery settings [ccr-recovery-settings] + +The following setting can be used to rate-limit the data transmitted during [remote recoveries](docs-content://deploy-manage/tools/cross-cluster-replication.md#ccr-remote-recovery): + +`ccr.indices.recovery.max_bytes_per_sec` ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) +: Limits the total inbound and outbound remote recovery traffic on each node. Since this limit applies on each node, but there may be many nodes performing remote recoveries concurrently, the total amount of remote recovery bytes may be much higher than this limit. If you set this limit too high then there is a risk that ongoing remote recoveries will consume an excess of bandwidth (or other resources) which could destabilize the cluster. This setting is used by both the leader and follower clusters. For example if it is set to `20mb` on a leader, the leader will only send `20mb/s` to the follower even if the follower is requesting and can accept `60mb/s`. Defaults to `40mb`. + + +## Advanced remote recovery settings [ccr-advanced-recovery-settings] + +The following *expert* settings can be set to manage the resources consumed by remote recoveries: + +`ccr.indices.recovery.max_concurrent_file_chunks` ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) +: Controls the number of file chunk requests that can be sent in parallel per recovery. As multiple remote recoveries might already running in parallel, increasing this expert-level setting might only help in situations where remote recovery of a single shard is not reaching the total inbound and outbound remote recovery traffic as configured by `ccr.indices.recovery.max_bytes_per_sec`. Defaults to `5`. The maximum allowed value is `10`. + +`ccr.indices.recovery.chunk_size`([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) +: Controls the chunk size requested by the follower during file transfer. Defaults to `1mb`. + +`ccr.indices.recovery.recovery_activity_timeout`([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) +: Controls the timeout for recovery activity. This timeout primarily applies on the leader cluster. The leader cluster must open resources in-memory to supply data to the follower during the recovery process. If the leader does not receive recovery requests from the follower for this period of time, it will close the resources. Defaults to 60 seconds. + +`ccr.indices.recovery.internal_action_timeout` ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) +: Controls the timeout for individual network requests during the remote recovery process. An individual action timing out can fail the recovery. Defaults to 60 seconds. + diff --git a/docs/reference/elasticsearch/configuration-reference/data-stream-lifecycle-settings.md b/docs/reference/elasticsearch/configuration-reference/data-stream-lifecycle-settings.md new file mode 100644 index 0000000000000..b5089bcd41c38 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/data-stream-lifecycle-settings.md @@ -0,0 +1,68 @@ +--- +navigation_title: "Data stream lifecycle settings" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/data-stream-lifecycle-settings.html +--- + +# Data stream lifecycle settings in {{es}} [data-stream-lifecycle-settings] + + +These are the settings available for configuring [data stream lifecycle](docs-content://manage-data/lifecycle/data-stream.md). + +## Cluster level settings [_cluster_level_settings] + +$$$data-streams-lifecycle-retention-max$$$ + +`data_streams.lifecycle.retention.max` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), [time unit value](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) The maximum retention period that will apply to all user data streams managed by the data stream lifecycle. The max retention will also override the retention of a data stream whose configured retention exceeds the max retention. It should be greater than `10s`. + +$$$data-streams-lifecycle-retention-default$$$ + +`data_streams.lifecycle.retention.default` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), [time unit value](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) The retention period that will apply to all user data streams managed by the data stream lifecycle that do not have retention configured. It should be greater than `10s` and less or equals than [`data_streams.lifecycle.retention.max`](#data-streams-lifecycle-retention-max). + +$$$data-streams-lifecycle-poll-interval$$$ + +`data_streams.lifecycle.poll_interval` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), [time unit value](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) How often {{es}} checks what is the next action for all data streams with a built-in lifecycle. Defaults to `5m`. + +$$$cluster-lifecycle-default-rollover$$$ + +`cluster.lifecycle.default.rollover` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), string) This property accepts a key value pair formatted string and configures the conditions that would trigger a data stream to [rollover](docs-content://manage-data/lifecycle/index-lifecycle-management/rollover.md) when it has `lifecycle` configured. This property is an implementation detail and subject to change. Currently, it defaults to `max_age=auto,max_primary_shard_size=50gb,min_docs=1,max_primary_shard_docs=200000000`, this means that your data stream will rollover if any of the following conditions are met: + + * Either any primary shard reaches the size of 50GB, + * or any primary shard contains 200.000.000 documents + * or the index reaches a certain age which depends on the retention time of your data stream, + * **and** has at least one document. + + +$$$data-streams-lifecycle-target-merge-factor$$$ + +`data_streams.lifecycle.target.merge.policy.merge_factor` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), integer) Data stream lifecycle implements [tail merging](docs-content://manage-data/lifecycle/data-stream.md#data-streams-lifecycle-how-it-works) by updating the lucene merge policy factor for the target backing index. The merge factor is both the number of segments that should be merged together, and the maximum number of segments that we expect to find on a given tier. This setting controls what value does [Data stream lifecycle](docs-content://manage-data/lifecycle/data-stream.md) configures on the target index. It defaults to `16`. The value will be visible under the `index.merge.policy.merge_factor` index setting on the target index. + +$$$data-streams-lifecycle-target-floor-segment$$$ + +`data_streams.lifecycle.target.merge.policy.floor_segment` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Data stream lifecycle implements [tail merging](docs-content://manage-data/lifecycle/data-stream.md#data-streams-lifecycle-how-it-works) by updating the lucene merge policy floor segment for the target backing index. This floor segment size is a way to prevent indices from having a long tail of very small segments. This setting controls what value does [data stream lifecycle](docs-content://manage-data/lifecycle/data-stream.md) configures on the target index. It defaults to `100MB`. + +$$$data-streams-lifecycle-signalling-error-retry-interval$$$ + +`data_streams.lifecycle.signalling.error_retry_interval` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), integer) Represents the number of retries data stream lifecycle has to perform for an index in an error step in order to signal that the index is not progressing (i.e. it’s stuck in an error step). The current signalling mechanism is a log statement at the `error` level however, the signalling mechanism can be extended in the future. Defaults to 10 retries. + + +## Index level settings [_index_level_settings] + +The following index-level settings are typically configured on the backing indices of a data stream. + +$$$index-lifecycle-prefer-ilm$$$ + +`index.lifecycle.prefer_ilm` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-settings), boolean) This setting determines which feature is managing the backing index of a data stream if, and only if, the backing index has an [{{ilm}}](docs-content://manage-data/lifecycle/index-lifecycle-management.md) ({{ilm-init}}) policy and the data stream has also a built-in lifecycle. When `true` this index is managed by {{ilm-init}}, when `false` the backing index is managed by the data stream lifecycle. Defaults to `true`. + +$$$index-data-stream-lifecycle-origination-date$$$ + +`index.lifecycle.origination_date` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-settings), long) If specified, this is the timestamp used to calculate the backing index generation age after this backing index has been [rolled over](docs-content://manage-data/lifecycle/index-lifecycle-management/rollover.md). The generation age is used to determine data retention, consequently, you can use this setting if you create a backing index that contains older data and want to ensure that the retention period or other parts of the lifecycle will be applied based on the data’s original timestamp and not the timestamp they got indexed. Specified as a Unix epoch value in milliseconds. diff --git a/docs/reference/elasticsearch/configuration-reference/discovery-cluster-formation-settings.md b/docs/reference/elasticsearch/configuration-reference/discovery-cluster-formation-settings.md new file mode 100644 index 0000000000000..f2670875e0510 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/discovery-cluster-formation-settings.md @@ -0,0 +1,142 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-discovery-settings.html +--- + +# Discovery and cluster formation settings [modules-discovery-settings] + +[Discovery and cluster formation](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation.md) are affected by the following settings: + +`discovery.seed_hosts` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Provides a list of the addresses of the master-eligible nodes in the cluster. May also be a single string containing the addresses separated by commas. Each address has the format `host:port` or `host`. The `host` is either a host name to be resolved by DNS, an IPv4 address, or an IPv6 address. IPv6 addresses must be enclosed in square brackets. If a host name resolves via DNS to multiple addresses, {{es}} uses all of them. DNS lookups are subject to [JVM DNS caching](docs-content://deploy-manage/deploy/self-managed/networkaddress-cache-ttl.md). If the `port` is not given then it is determined by checking the following settings in order: + +1. `transport.profiles.default.port` +2. `transport.port` + +If neither of these is set then the default port is `9300`. The default value for `discovery.seed_hosts` is `["127.0.0.1", "[::1]"]`. See [`discovery.seed_hosts`](docs-content://deploy-manage/deploy/self-managed/important-settings-configuration.md#unicast.hosts). + + +`discovery.seed_providers` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies which types of [seed hosts provider](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/discovery-hosts-providers.md#built-in-hosts-providers) to use to obtain the addresses of the seed nodes used to start the discovery process. By default, it is the [settings-based seed hosts provider](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/discovery-hosts-providers.md#settings-based-hosts-provider) which obtains the seed node addresses from the `discovery.seed_hosts` setting. + +`discovery.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies whether {{es}} should form a multiple-node cluster. Defaults to `multi-node`, which means that {{es}} discovers other nodes when forming a cluster and allows other nodes to join the cluster later. If set to `single-node`, {{es}} forms a single-node cluster and suppresses the timeout set by `cluster.publish.timeout`. For more information about when you might use this setting, see [Single-node discovery](docs-content://deploy-manage/deploy/self-managed/bootstrap-checks.md#single-node-discovery). + +`cluster.initial_master_nodes` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets the initial set of master-eligible nodes in a brand-new cluster. By default this list is empty, meaning that this node expects to join a cluster that has already been bootstrapped. Remove this setting once the cluster has formed, and never set it again for this cluster. Do not configure this setting on master-ineligible nodes. Do not configure this setting on nodes joining an existing cluster. Do not configure this setting on nodes which are restarting. Do not configure this setting when performing a full-cluster restart. See [`cluster.initial_master_nodes`](docs-content://deploy-manage/deploy/self-managed/important-settings-configuration.md#initial_master_nodes). + + +## Expert settings [_expert_settings] + +Discovery and cluster formation are also affected by the following *expert-level* settings, although it is not recommended to change any of these from their default values. + +::::{warning} +If you adjust these settings then your cluster may not form correctly or may become unstable or intolerant of certain failures. +:::: + + +`discovery.cluster_formation_warning_timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long a node will try to form a cluster before logging a warning that the cluster did not form. Defaults to `10s`. If a cluster has not formed after `discovery.cluster_formation_warning_timeout` has elapsed then the node will log a warning message that starts with the phrase `master not discovered` which describes the current state of the discovery process. + +`discovery.find_peers_interval` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long a node will wait before attempting another discovery round. Defaults to `1s`. + +`discovery.probe.connect_timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long to wait when attempting to connect to each address. Defaults to `30s`. + +`discovery.probe.handshake_timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long to wait when attempting to identify the remote node via a handshake. Defaults to `30s`. + +`discovery.request_peers_timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long a node will wait after asking its peers again before considering the request to have failed. Defaults to `3s`. + +`discovery.find_peers_warning_timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long a node will attempt to discover its peers before it starts to log verbose messages describing why the connection attempts are failing. Defaults to `3m`. + +`discovery.seed_resolver.max_concurrent_resolvers` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies how many concurrent DNS lookups to perform when resolving the addresses of seed nodes. Defaults to `10`. + +`discovery.seed_resolver.timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies how long to wait for each DNS lookup performed when resolving the addresses of seed nodes. Defaults to `5s`. + +`cluster.auto_shrink_voting_configuration` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Controls whether the [voting configuration](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/modules-discovery-voting.md) sheds departed nodes automatically, as long as it still contains at least 3 nodes. The default value is `true`. If set to `false`, the voting configuration never shrinks automatically and you must remove departed nodes manually with the [voting configuration exclusions API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-post-voting-config-exclusions). + +$$$master-election-settings$$$`cluster.election.back_off_time` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets the amount to increase the upper bound on the wait before an election on each election failure. Note that this is *linear* backoff. This defaults to `100ms`. Changing this setting from the default may cause your cluster to fail to elect a master node. + +`cluster.election.duration` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long each election is allowed to take before a node considers it to have failed and schedules a retry. This defaults to `500ms`. Changing this setting from the default may cause your cluster to fail to elect a master node. + +`cluster.election.initial_timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets the upper bound on how long a node will wait initially, or after the elected master fails, before attempting its first election. This defaults to `100ms`. Changing this setting from the default may cause your cluster to fail to elect a master node. + +`cluster.election.max_timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets the maximum upper bound on how long a node will wait before attempting an first election, so that an network partition that lasts for a long time does not result in excessively sparse elections. This defaults to `10s`. Changing this setting from the default may cause your cluster to fail to elect a master node. + +$$$fault-detection-settings$$$`cluster.fault_detection.follower_check.interval` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long the elected master waits between follower checks to each other node in the cluster. Defaults to `1s`. Changing this setting from the default may cause your cluster to become unstable. + +`cluster.fault_detection.follower_check.timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long the elected master waits for a response to a follower check before considering it to have failed. Defaults to `10s`. Changing this setting from the default may cause your cluster to become unstable. + +`cluster.fault_detection.follower_check.retry_count` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how many consecutive follower check failures must occur to each node before the elected master considers that node to be faulty and removes it from the cluster. Defaults to `3`. Changing this setting from the default may cause your cluster to become unstable. + +`cluster.fault_detection.leader_check.interval` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long each node waits between checks of the elected master. Defaults to `1s`. Changing this setting from the default may cause your cluster to become unstable. + +`cluster.fault_detection.leader_check.timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long each node waits for a response to a leader check from the elected master before considering it to have failed. Defaults to `10s`. Changing this setting from the default may cause your cluster to become unstable. + +`cluster.fault_detection.leader_check.retry_count` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how many consecutive leader check failures must occur before a node considers the elected master to be faulty and attempts to find or elect a new master. Defaults to `3`. Changing this setting from the default may cause your cluster to become unstable. + +`cluster.follower_lag.timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long the master node waits to receive acknowledgements for cluster state updates from lagging nodes. The default value is `90s`. If a node does not successfully apply the cluster state update within this period of time, it is considered to have failed and is removed from the cluster. See [Publishing the cluster state](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/cluster-state-overview.md#cluster-state-publishing). + +`cluster.max_voting_config_exclusions` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Sets a limit on the number of voting configuration exclusions at any one time. The default value is `10`. See [*Add and remove nodes in your cluster*](docs-content://deploy-manage/maintenance/add-and-remove-elasticsearch-nodes.md). + +`cluster.publish.info_timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long the master node waits for each cluster state update to be completely published to all nodes before logging a message indicating that some nodes are responding slowly. The default value is `10s`. + +`cluster.publish.timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets how long the master node waits for each cluster state update to be completely published to all nodes, unless `discovery.type` is set to `single-node`. The default value is `30s`. See [Publishing the cluster state](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/cluster-state-overview.md#cluster-state-publishing). + +`cluster.discovery_configuration_check.interval` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets the interval of some checks that will log warnings about an incorrect discovery configuration. The default value is `30s`. + +`cluster.join_validation.cache_timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) When a node requests to join the cluster, the elected master node sends it a copy of a recent cluster state to detect certain problems which might prevent the new node from joining the cluster. The master caches the state it sends and uses the cached state if another node joins the cluster soon after. This setting controls how long the master waits until it clears this cache. Defaults to `60s`. + +$$$no-master-block$$$ + +`cluster.no_master_block` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Specifies which operations are rejected when there is no active master in a cluster. This setting has three valid values: + + `all` + : All operations on the node (both read and write operations) are rejected. This also applies for API cluster state read or write operations, like the get index settings, update mapping, and cluster state API. + + `write` + : (default) Write operations are rejected. Read operations succeed, based on the last known cluster configuration. This situation may result in partial reads of stale data as this node may be isolated from the rest of the cluster. + + `metadata_write` + : Only metadata write operations (e.g. mapping updates, routing table changes) are rejected but regular indexing operations continue to work. Read and write operations succeed, based on the last known cluster configuration. This situation may result in partial reads of stale data as this node may be isolated from the rest of the cluster. + + ::::{note} + * The `cluster.no_master_block` setting doesn’t apply to nodes-based APIs (for example, cluster stats, node info, and node stats APIs). Requests to these APIs are not be blocked and can run on any available node. + * For the cluster to be fully operational, it must have an active master. + + :::: + + +`monitor.fs.health.enabled` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) If `true`, the node runs periodic [filesystem health checks](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/cluster-fault-detection.md#cluster-fault-detection-filesystem-health). Defaults to `true`. + +`monitor.fs.health.refresh_interval` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Interval between successive [filesystem health checks](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/cluster-fault-detection.md#cluster-fault-detection-filesystem-health). Defaults to `2m`. + +`monitor.fs.health.slow_path_logging_threshold` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) If a [filesystem health checks](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/cluster-fault-detection.md#cluster-fault-detection-filesystem-health) takes longer than this threshold then {{es}} logs a warning. Defaults to `5s`. + diff --git a/docs/reference/elasticsearch/configuration-reference/ece-elasticsearch-settings.md b/docs/reference/elasticsearch/configuration-reference/ece-elasticsearch-settings.md new file mode 100644 index 0000000000000..8942989b0397b --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/ece-elasticsearch-settings.md @@ -0,0 +1,39 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/cloud-enterprise/current/ece-add-user-settings.html#ece-change-user-settings-examples +--- + +# ECE Elasticsearch settings [ece-add-user-settings] + +Change how Elasticsearch runs by providing your own user settings. User settings are appended to the `elasticsearch.yml` configuration file for your cluster and provide custom configuration options. Elastic Cloud Enterprise supports many of the user settings for the version of Elasticsearch that your cluster is running. + +::::{tip} +Some settings that could break your cluster if set incorrectly are blocked, such as certain zen discovery and security settings. For examples of a few of the settings that are generally safe in cloud environments, check [Edit stack settings](docs-content://deploy-manage/deploy/cloud-enterprise/edit-stack-settings.md) for {{ece}} and [Edit stack settings](docs-content://deploy-manage/deploy/elastic-cloud/edit-stack-settings.md) for the {{ecloud}} hosted offering. +:::: + + +To add user settings: + +1. [Log into the Cloud UI](docs-content://deploy-manage/deploy/cloud-enterprise/log-into-cloud-ui.md). +2. On the **Deployments** page, select your deployment. + + Narrow the list by name, ID, or choose from several other filters. To further define the list, use a combination of filters. + +3. From your deployment menu, go to the **Edit** page. +4. In the **Elasticsearch** section, select **Edit elasticsearch.yml**. For deployments with existing user settings, you may have to expand the **User setting overrides** caret for each node type instead. +5. Update the user settings. +6. Select **Save changes**. + + ::::{warning} + If you encounter the **Edit elasticsearch.yml** carets, be sure to make your changes on all Elasticsearch node types. + :::: + + + +## Enable email notifications from Gmail [ece_enable_email_notifications_from_gmail] + +You can configure email notifications to Gmail for a user that you specify. For details, refer to [Configuring email actions](docs-content://explore-analyze/alerts-cases/watcher/actions-email.md). + +::::{warning} +Before you add the `xpack.notification.email*` setting in Elasticsearch user settings, make sure you add the account SMTP password to the keystore as a [secret value](docs-content://deploy-manage/security/secure-settings.md). +:::: diff --git a/docs/reference/elasticsearch/configuration-reference/elastic-cloud-hosted-elasticsearch-settings.md b/docs/reference/elasticsearch/configuration-reference/elastic-cloud-hosted-elasticsearch-settings.md new file mode 100644 index 0000000000000..4e6298f93344f --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/elastic-cloud-hosted-elasticsearch-settings.md @@ -0,0 +1,296 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/cloud/current/ec-add-user-settings.html#ec-es-elasticsearch-settings +--- + +# Elastic Cloud Hosted Elasticsearch settings [ec-add-user-settings] + +Change how {{es}} runs by providing your own user settings. Elasticsearch Service appends these settings to each node’s `elasticsearch.yml` configuration file. + +Elasticsearch Service automatically rejects `elasticsearch.yml` settings that could break your cluster. For a list of supported settings, check [Supported {{es}} settings](#ec-es-elasticsearch-settings). + +::::{warning} +You can also update [dynamic cluster settings](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting) using {{es}}'s [update cluster settings API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings). However, Elasticsearch Service doesn’t reject unsafe setting changes made using this API. Use with caution. +:::: + + +To add or edit user settings: + +1. Log in to the [Elasticsearch Service Console](https://cloud.elastic.co?page=docs&placement=docs-body). +2. Find your deployment on the home page in the Elasticsearch Service card and select **Manage** to access it directly. Or, select **Hosted deployments** to go to the deployments page to view all of your deployments. + + On the deployments page you can narrow your deployments by name, ID, or choose from several other filters. To customize your view, use a combination of filters, or change the format from a grid to a list. + +3. From your deployment menu, go to the **Edit** page. +4. In the **Elasticsearch** section, select **Manage user settings and extensions**. +5. Update the user settings. +6. Select **Save changes**. + +::::{note} +In some cases, you may get a warning saying "User settings are different across Elasticsearch instances". To fix this issue, ensure that your user settings (including the comments sections and whitespaces) are identical across all Elasticsearch nodes (not only the data tiers, but also the Master, Machine Learning, and Coordinating nodes). +:::: + + +## Supported {{es}} settings [ec-es-elasticsearch-settings] + +Elasticsearch Service supports the following `elasticsearch.yml` settings. + +### General settings [ec_general_settings] + +The following general settings are supported: + +$$$http-cors-settings$$$`http.cors.*` +: Enables cross-origin resource sharing (CORS) settings for the [HTTP module](/reference/elasticsearch/configuration-reference/networking-settings.md). + + ::::{note} + If your use case depends on the ability to receive CORS requests and you have a cluster that was provisioned prior to January 25th 2019, you must manually set `http.cors.enabled` to `true` and allow a specific set of hosts with `http.cors.allow-origin`. Applying these changes in your Elasticsearch configuration allows cross-origin resource sharing requests. + :::: + + +`http.compression` +: Support for [HTTP compression](/reference/elasticsearch/configuration-reference/networking-settings.md) when possible (with Accept-Encoding). Defaults to `true`. + +`transport.compress` +: Configures [transport compression](/reference/elasticsearch/configuration-reference/networking-settings.md) for node-to-node traffic. + +`transport.compression_scheme` +: Configures [transport compression](/reference/elasticsearch/configuration-reference/networking-settings.md) for node-to-node traffic. + +`repositories.url.allowed_urls` +: Enables explicit allowing of [read-only URL repositories](docs-content://deploy-manage/tools/snapshot-and-restore/read-only-url-repository.md). + +`reindex.remote.whitelist` +: Explicitly allows the set of hosts that can be [reindexed from remotely](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-reindex). Expects a YAML array of `host:port` strings. Consists of a comma-delimited list of `host:port` entries. Defaults to `["\*.io:*", "\*.com:*"]`. + +`reindex.ssl.*` +: To learn more on how to configure reindex SSL user settings, check [configuring reindex SSL parameters](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-reindex). + +`script.painless.regex.enabled` +: Enables [regular expressions](/reference/scripting-languages/painless/brief-painless-walkthrough.md#modules-scripting-painless-regex) for the Painless scripting language. + +`action.auto_create_index` +: [Automatically create index](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-create) if it doesn’t already exist. + +`action.destructive_requires_name` +: When set to `true`, users must [specify the index name](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-delete) to delete an index. It’s not possible to delete _all or use wildcards. + +`xpack.notification.webhook.additional_token_enabled` +: When set to `true`, {{es}} automatically sets a token which enables the bypassing of traffic filters for calls initiated by Watcher towards {{es}} or {{kib}}. The default is `false` and the feature is available starting with {{es}} version 8.7.1 and later. + + ::::{important} + This setting only applies to the Watcher `webhook` action, not the `http` input action. + :::: + + +`cluster.indices.close.enable` +: Enables closing indices in Elasticsearch. Defaults to `true` for versions 7.2.0 and later, and to `false` for previous versions. In versions 7.1 and below, closed indices represent a data loss risk: if you close an index, it is not included in snapshots and you will not be able to restore the data. Similarly, closed indices are not included when you make cluster configuration changes, such as scaling to a different capacity, failover, and many other operations. Lastly, closed indices can lead to inaccurate disk space counts. + + ::::{warning} + For versions 7.1 and below, closed indices represent a data loss risk. Enable this setting only temporarily for these versions. + :::: + + +`azure.client.CLIENT_NAME.endpoint_suffix` +: Allows providing the [endpoint_suffix client setting](docs-content://deploy-manage/tools/snapshot-and-restore/azure-repository.md#repository-azure-client-settings) for a non-internal Azure client used for snapshot/restore. Note that `CLIENT_NAME` should be replaced with the name of the created client. + + +### Circuit breaker settings [ec_circuit_breaker_settings] + +The following circuit breaker settings are supported: + +`indices.breaker.total.limit` +: Configures [the parent circuit breaker settings](/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md#parent-circuit-breaker). + +`indices.breaker.fielddata.limit` +: Configures [the limit for the fielddata breaker](/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md#fielddata-circuit-breaker). + +`indices.breaker.fielddata.overhead` +: Configures [a constant that all field data estimations are multiplied with to determine a final estimation](/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md#fielddata-circuit-breaker). + +`indices.breaker.request.limit` +: Configures [the limit for the request breaker](/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md#request-circuit-breaker). + +`indices.breaker.request.overhead` +: Configures [a constant that all request estimations are multiplied by to determine a final estimation](/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md#request-circuit-breaker). + + +### Indexing pressure settings [ec_indexing_pressure_settings] + +The following indexing pressure settings are supported: + +`indexing_pressure.memory.limit` +: Configures [the indexing pressure settings](/reference/elasticsearch/index-settings/pressure.md). + + +### X-Pack [ec_x_pack] + +#### Version 8.5.3+, 7.x support in 7.17.8+ [ec_version_8_5_3_7_x_support_in_7_17_8] + +`xpack.security.transport.ssl.trust_restrictions.x509_fields` +: Specifies which field(s) from the TLS certificate is used to match for the restricted trust management that is used for remote clusters connections. This should only be set when a self managed cluster can not create certificates that follow the Elastic Cloud pattern. The default value is ["subjectAltName.otherName.commonName"], the Elastic Cloud pattern. "subjectAltName.dnsName" is also supported and can be configured in addition to or in replacement of the default. + + +#### All supported versions [ec_all_supported_versions] + +`xpack.ml.inference_model.time_to_live` +: Sets the duration of time that the trained models are cached. Check [{{ml-cap}} settings](/reference/elasticsearch/configuration-reference/machine-learning-settings.md). + +`xpack.security.loginAssistanceMessage` +: Adds a message to the login screen. Useful for displaying corporate messages. + +`xpack.security.authc.anonymous.*` +: To learn more on how to enable anonymous access, check [Enabling anonymous access](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/anonymous-access.md) + +`xpack.notification.slack` +: Configures [Slack notification settings](docs-content://explore-analyze/alerts-cases/watcher/actions-slack.md). Note that you need to add `secure_url` as a [secret value to the keystore](docs-content://deploy-manage/security/secure-settings.md). + +`xpack.notification.pagerduty` +: Configures [PagerDuty notification settings](docs-content://explore-analyze/alerts-cases/watcher/actions-pagerduty.md#configuring-pagerduty). + +`xpack.watcher.trigger.schedule.engine` +: Defines when the watch should start, based on date and time [Learn more](docs-content://explore-analyze/alerts-cases/watcher/schedule-types.md). + +`xpack.notification.email.html.sanitization.*` +: Enables [email notification settings](/reference/elasticsearch/configuration-reference/watcher-settings.md) to sanitize HTML elements in emails that are sent. + +`xpack.monitoring.collection.interval` +: Controls [how often data samples are collected](/reference/elasticsearch/configuration-reference/monitoring-settings.md#monitoring-collection-settings). + +`xpack.monitoring.collection.min_interval_seconds` +: Specifies the minimum number of seconds that a time bucket in a chart can represent. If you modify the `xpack.monitoring.collection.interval`, use the same value in this setting. + + Defaults to `10` (10 seconds). + + +$$$xpack-monitoring-history-duration$$$`xpack.monitoring.history.duration` +: Sets the [retention duration](/reference/elasticsearch/configuration-reference/monitoring-settings.md#monitoring-collection-settings) beyond which the indices created by a monitoring exporter will be automatically deleted. + +`xpack.watcher.history.cleaner_service.enabled` +: Controls [whether old watcher indices are automatically deleted](/reference/elasticsearch/configuration-reference/watcher-settings.md#general-notification-settings). + +`xpack.http.ssl.cipher_suites` +: Controls the list of supported cipher suites for all outgoing TLS connections. + +`xpack.security.authc.realms.saml.*` +: To learn more on how to enable SAML and related user settings, check [secure your clusters with SAML](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/saml.md). + +`xpack.security.authc.realms.oidc.*` +: To learn more on how to enable OpenID Connect and related user settings, check [secure your clusters with OpenID Connect](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/openid-connect.md). + +`xpack.security.authc.realms.kerberos.*` +: To learn more on how to enable Kerberos and relate user settings, check [secure your clusters with Kerberos](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/kerberos.md). + +`xpack.security.authc.realms.jwt.*` +: To learn more on how to enable JWT and related user settings, check [secure your clusters with JWT](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/jwt.md). + +::::{note} +All SAML, OpenID Connect, Kerberos, and JWT settings are allowlisted. +:::: + + + + +### Search [ec_search] + +The following search settings are supported: + +* `search.aggs.rewrite_to_filter_by_filter` + + +### Disk-based shard allocation settings [shard-allocation-settings] + +The following disk-based allocation settings are supported: + +`cluster.routing.allocation.disk.threshold_enabled` +: Enable or disable [disk allocation](/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings.md) decider and defaults to `true`. + +`cluster.routing.allocation.disk.watermark.low` +: Configures [disk-based shard allocation’s low watermark](/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings.md). + +`cluster.routing.allocation.disk.watermark.high` +: Configures [disk-based shard allocation’s high watermark](/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings.md). + +`cluster.routing.allocation.disk.watermark.flood_stage` +: Configures [disk-based shard allocation’s flood_stage](/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings.md). + +::::{tip} +Remember to update user settings for alerts when performing a major version upgrade. +:::: + + + +### Enrich settings [ec_enrich_settings] + +The following enrich settings are supported: + +`enrich.cache_size` +: Maximum number of searches to cache for enriching documents. Defaults to 1000. There is a single cache for all enrich processors in the cluster. This setting determines the size of that cache. + +`enrich.coordinator_proxy.max_concurrent_requests` +: Maximum number of concurrent multi-search requests to run when enriching documents. Defaults to 8. + +`enrich.coordinator_proxy.max_lookups_per_request` +: Maximum number of searches to include in a multi-search request when enriching documents. Defaults to 128. + +`enrich.coordinator_proxy.queue_capacity` +: coordinator queue capacity, defaults to max_concurrent_requests * max_lookups_per_request + + +### Audit settings [ec_audit_settings] + +The following audit settings are supported: + +`xpack.security.audit.enabled` +: Enables auditing on Elasticsearch cluster nodes. Defaults to *false*. + +`xpack.security.audit.logfile.events.include` +: Specifies which events to include in the auditing output. + +`xpack.security.audit.logfile.events.exclude` +: Specifies which events to exclude from the output. No events are excluded by default. + +`xpack.security.audit.logfile.events.emit_request_body` +: Specifies whether to include the request body from REST requests on certain event types, for example *authentication_failed*. Defaults to *false*. + +`xpack.security.audit.logfile.emit_node_name` +: Specifies whether to include the node name as a field in each audit event. Defaults to *true*. + +`xpack.security.audit.logfile.emit_node_host_address` +: Specifies whether to include the node’s IP address as a field in each audit event. Defaults to *false*. + +`xpack.security.audit.logfile.emit_node_host_name` +: Specifies whether to include the node’s host name as a field in each audit event. Defaults to *false*. + +`xpack.security.audit.logfile.emit_node_id` +: Specifies whether to include the node ID as a field in each audit event. Defaults to *true*. + +`xpack.security.audit.logfile.events.ignore_filters..users` +: A list of user names or wildcards. The specified policy will not print audit events for users matching these values. + +`xpack.security.audit.logfile.events.ignore_filters..realms` +: A list of authentication realm names or wildcards. The specified policy will not print audit events for users in these realms. + +`xpack.security.audit.logfile.events.ignore_filters..roles` +: A list of role names or wildcards. The specified policy will not print audit events for users that have these roles. + +`xpack.security.audit.logfile.events.ignore_filters..indices` +: A list of index names or wildcards. The specified policy will not print audit events when all the indices in the event match these values. + +`xpack.security.audit.logfile.events.ignore_filters..actions` +: A list of action names or wildcards. The specified policy will not print audit events for actions matching these values. + +::::{note} +To enable auditing you must first [enable deployment logging](docs-content://deploy-manage/monitor/stack-monitoring/elastic-cloud-stack-monitoring.md). +:::: + + + +### Universal Profiling settings [ec_universal_profiling_settings] + +The following settings for Elastic Universal Profiling are supported: + +`xpack.profiling.enabled` +: *Version 8.7.0+*: Specifies whether the Universal Profiling Elasticsearch plugin is enabled. Defaults to *true*. + +`xpack.profiling.templates.enabled` +: *Version 8.9.0+*: Specifies whether Universal Profiling related index templates should be created on startup. Defaults to *false*. diff --git a/docs/reference/elasticsearch/configuration-reference/elastic-cloud-serverless-elasticsearch-settings.md b/docs/reference/elasticsearch/configuration-reference/elastic-cloud-serverless-elasticsearch-settings.md new file mode 100644 index 0000000000000..5ec71a5621777 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/elastic-cloud-serverless-elasticsearch-settings.md @@ -0,0 +1,157 @@ +--- +navigation_title: "Serverless differences" +mapped_pages: + - https://www.elastic.co/guide/en/serverless/current/elasticsearch-differences.html +--- + +# Differences from other {{es}} offerings [elasticsearch-differences] + + +[{{es-serverless}}](docs-content://solutions/search.md) handles all the infrastructure management for you, providing a fully managed {{es}} service. + +If you’ve used {{es}} before, you’ll notice some differences in how you work with the service on {{serverless-full}}, because a number of APIs and settings are not required for serverless projects. + +This guide helps you understand what’s different, what’s available, and how to work effectively when running {{es}} on {{serverless-full}}. + + +## Fully managed infrastructure [elasticsearch-differences-serverless-infrastructure-management] + +{{es-serverless}} manages all infrastructure automatically, including: + +* Cluster scaling and optimization +* Node management and allocation +* Shard distribution and replication +* Resource utilization and monitoring + +This fully managed approach means many traditional {{es}} infrastructure APIs and settings are not available to end users, as detailed in the following sections. + + +## Index size guidelines [elasticsearch-differences-serverless-index-size] + +To ensure optimal performance, follow these recommendations for sizing individual indices on {{es-serverless}}: + +| Use case | Maximum index size | Project configuration | +| --- | --- | --- | +| Vector search | 150GB | Vector optimized | +| General search (non data-stream) | 300GB | General purpose | +| Other uses (non data-stream) | 600GB | General purpose | + +For large datasets that exceed the recommended maximum size for a single index, consider splitting your data across smaller indices and using an alias to search them collectively. + +These recommendations do not apply to indices using better binary quantization (BBQ). Refer to [vector quantization](/reference/elasticsearch/mapping-reference/dense-vector.md#dense-vector-quantization) in the core {{es}} docs for more information. + + +## API availability [elasticsearch-differences-serverless-apis-availability] + +Because {{es-serverless}} manages infrastructure automatically, certain APIs are not available, while others remain fully accessible. + +::::{tip} +Refer to the [{{es-serverless}} API reference](https://www.elastic.co/docs/api/doc/elasticsearch-serverless) for a complete list of available APIs. + +:::: + + +The following categories of operations are unavailable: + +Infrastructure operations +: * All `_nodes/*` operations +* All `_cluster/*` operations +* Most `_cat/*` operations, except for index-related operations such as `/_cat/indices` and `/_cat/aliases` + + +Storage and backup +: * All `_snapshot/*` operations +* Repository management operations + + +Index management +: * `indices/close` operations +* `indices/open` operations +* Recovery and stats operations +* Force merge operations + + +When attempting to use an unavailable API, you’ll receive a clear error message: + +```json +{ + "error": { + "root_cause": [ + { + "type": "api_not_available_exception", + "reason": "Request for uri [/] with method [] exists but is not available when running in serverless mode" + } + ], + "status": 410 + } +} +``` + + +## Settings availability [elasticsearch-differences-serverless-settings-availability] + +In {{es-serverless}}, you can only configure [index-level settings](/reference/elasticsearch/index-settings/index.md). Cluster-level settings and node-level settings are not required by end users and the `elasticsearch.yml` file is fully managed by Elastic. + +Available settings +: **Index-level settings**: Settings that control how {{es}} documents are processed, stored, and searched are available to end users. These include: + + * Analysis configuration + * Mapping parameters + * Search/query settings + * Indexing settings such as `refresh_interval` + + +Managed settings +: **Infrastructure-related settings**: Settings that affect cluster resources or data distribution are not available to end users. These include: + + * Node configurations + * Cluster topology + * Shard allocation + * Resource management + + + +## Feature availability [elasticsearch-differences-serverless-feature-categories] + +Some features that are available in Elastic Cloud Hosted and self-managed offerings are not available in {{es-serverless}}. These features have either been replaced by a new feature, are planned to be released in future, or are not applicable in the new serverless architecture. + + +### Replaced features [elasticsearch-differences-serverless-features-replaced] + +These features have been replaced by a new feature and are therefore not available on {{es-serverless}}: + +* **Index lifecycle management ({{ilm-init}})** is not available, in favor of [**data stream lifecycle**](docs-content://manage-data/data-store/index-basics.md). + + In an Elastic Cloud Hosted or self-managed environment, {{ilm-init}} lets you automatically transition indices through data tiers according to your performance needs and retention requirements. This allows you to balance hardware costs with performance. {{es-serverless}} eliminates this complexity by optimizing your cluster performance for you. + + Data stream lifecycle is an optimized lifecycle tool that lets you focus on the most common lifecycle management needs, without unnecessary hardware-centric concepts like data tiers. + +* **Watcher** is not available, in favor of [**Alerts**](docs-content://explore-analyze/alerts-cases/alerts.md#rules-alerts). + + Kibana Alerts allows rich integrations across use cases like APM, metrics, security, and uptime. Prepackaged rule types simplify setup and hide the details of complex, domain-specific detections, while providing a consistent interface across Kibana. + + + +### Planned features [elasticsearch-differences-serverless-feature-planned] + +The following features are planned for future support in all {{serverless-full}} projects: + +* Reindexing from remote clusters +* Cross-project search and replication +* Snapshot and restore +* Migrations from non-serverless deployments +* Audit logging +* Clone index API +* Traffic filtering and VPCs + + + +### Unplanned features [elasticsearch-differences-serverless-feature-unavailable] + +The following features are not available in {{es-serverless}} and are not planned for future support: + +* [Custom plugins and bundles](docs-content://deploy-manage/deploy/elastic-cloud/upload-custom-plugins-bundles.md) +* [{{es}} for Apache Hadoop](elasticsearch-hadoop://docs/reference/elasticsearch-for-apache-hadoop.md) +* [Scripted metric aggregations](/reference/data-analysis/aggregations/search-aggregations-metrics-scripted-metric-aggregation.md) +* Managed web crawler: You can use the [self-managed web crawler](https://github.com/elastic/crawler) instead. +* Managed Search connectors: You can use [self-managed Search connectors](/reference/ingestion-tools/search-connectors/self-managed-connectors.md) instead. diff --git a/docs/reference/elasticsearch/configuration-reference/field-data-cache-settings.md b/docs/reference/elasticsearch/configuration-reference/field-data-cache-settings.md new file mode 100644 index 0000000000000..d196f8e3dbb51 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/field-data-cache-settings.md @@ -0,0 +1,23 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-fielddata.html +--- + +# Field data cache settings [modules-fielddata] + +The field data cache contains [field data](/reference/elasticsearch/mapping-reference/text.md#fielddata-mapping-param) and [global ordinals](/reference/elasticsearch/mapping-reference/eager-global-ordinals.md), which are both used to support aggregations on certain field types. Since these are on-heap data structures, it is important to monitor the cache’s use. + +The entries in the cache are expensive to build, so the default behavior is to keep the cache loaded in memory. The default cache size is unlimited, causing the cache to grow until it reaches the limit set by the [field data circuit breaker](/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md#fielddata-circuit-breaker). This behavior can be configured. + +If the cache size limit is set, the cache will begin clearing the least-recently-updated entries in the cache. This setting can automatically avoid the circuit breaker limit, at the cost of rebuilding the cache as needed. + +If the circuit breaker limit is reached, further requests that increase the cache size will be prevented. In this case you should manually [clear the cache](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-clear-cache). + +::::{tip} +You can monitor memory usage for field data as well as the field data circuit breaker using the [nodes stats API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-nodes-stats) or the [cat fielddata API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cat-fielddata). +:::: + + +`indices.fielddata.cache.size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The max size of the field data cache, eg `38%` of node heap space, or an absolute value, eg `12GB`. Defaults to unbounded. If you choose to set it, it should be smaller than [Field data circuit breaker](/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md#fielddata-circuit-breaker) limit. + diff --git a/docs/reference/elasticsearch/configuration-reference/health-diagnostic-settings.md b/docs/reference/elasticsearch/configuration-reference/health-diagnostic-settings.md new file mode 100644 index 0000000000000..131c8e0e544f3 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/health-diagnostic-settings.md @@ -0,0 +1,47 @@ +--- +navigation_title: "Health Diagnostic settings" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/health-diagnostic-settings.html +--- + +# Health diagnostic settings in {{es}} [health-diagnostic-settings] + + +The following are the *expert-level* settings available for configuring an internal diagnostics service. The output of this service is currently exposed through the Health API [Health API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-health-report). It is not recommended to change any of these from their default values. + +## Cluster level settings [_cluster_level_settings_2] + +`health.master_history.has_master_lookup_timeframe` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The amount of time a node looks back to see if it has observed a master at all, before moving on with other checks. Defaults to `30s` (30 seconds). + +`master_history.max_age` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The timeframe we record the master history to be used for diagnosing the cluster health. Master node changes older than this time will not be considered when diagnosing the cluster health. Defaults to `30m` (30 minutes). + +`health.master_history.identity_changes_threshold` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The number of master identity changes witnessed by a node that indicates the cluster is not healthy. Defaults to `4`. + +`health.master_history.no_master_transitions_threshold` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The number of transitions to no master witnessed by a node that indicates the cluster is not healthy. Defaults to `4`. + +`health.node.enabled` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Enables the health node, which allows the health API to provide indications about cluster wide health aspects such as disk space. + +`health.reporting.local.monitor.interval` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Determines the interval in which each node of the cluster monitors aspects that comprise its local health such as its disk usage. + +`health.ilm.max_time_on_action` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The minimum amount of time an index has to be in an {{ilm}} ({{ilm-init}}) action before it is considered stagnant. Defaults to `1d` (1 day). + +`health.ilm.max_time_on_step` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The minimum amount of time an index has to be in an {{ilm-init}} step before it is considered stagnant. Defaults to `1d` (1 day). + +`health.ilm.max_retries_per_step` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The minimum amount of times an index has retried by an {{ilm-init}} step before it is considered stagnant. Defaults to `100` + +`health.periodic_logger.enabled` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Enables the health periodic logger, which logs the health statuses of each health indicator along with the top level one as observed by the Health API. Defaults to `false`. + +`health.periodic_logger.poll_interval` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings), [time unit value](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) How often {{es}} logs the health status of the cluster and of each health indicator as observed by the Health API. Defaults to `60s` (60 seconds). + + diff --git a/docs/reference/elasticsearch/configuration-reference/index-lifecycle-management-settings.md b/docs/reference/elasticsearch/configuration-reference/index-lifecycle-management-settings.md new file mode 100644 index 0000000000000..1085f81421355 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/index-lifecycle-management-settings.md @@ -0,0 +1,63 @@ +--- +navigation_title: "{{ilm-cap}} settings" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/ilm-settings.html +--- + +# {{ilm-cap}} settings in {{es}} [ilm-settings] + + +These are the settings available for configuring [{{ilm}}](docs-content://manage-data/lifecycle/index-lifecycle-management.md) ({{ilm-init}}). + +## Cluster level settings [_cluster_level_settings_3] + +`xpack.ilm.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), Boolean) [7.8.0]
This deprecated setting has no effect and will be removed in Elasticsearch 8.0. + +`indices.lifecycle.history_index_enabled` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), Boolean) Whether ILM’s history index is enabled. If enabled, ILM will record the history of actions taken as part of ILM policies to the `ilm-history-*` indices. Defaults to `true`. + +$$$indices-lifecycle-poll-interval$$$ + +`indices.lifecycle.poll_interval` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), [time unit value](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) How often {{ilm}} checks for indices that meet policy criteria. Defaults to `10m`. + +$$$indices-lifecycle-rollover-only-if-has-documents$$$ + +`indices.lifecycle.rollover.only_if_has_documents` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), Boolean) Whether ILM will only roll over non-empty indices. If enabled, ILM will only roll over indices as long as they contain at least one document. Defaults to `true`. + + +## Index level settings [_index_level_settings_2] + +These index-level {{ilm-init}} settings are typically configured through index templates. For more information, see [Create a lifecycle policy](docs-content://manage-data/lifecycle/index-lifecycle-management/tutorial-automate-rollover.md#ilm-gs-create-policy). + +`index.lifecycle.indexing_complete` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-settings), Boolean) Indicates whether or not the index has been rolled over. Automatically set to `true` when {{ilm-init}} completes the rollover action. You can explicitly set it to [skip rollover](docs-content://manage-data/lifecycle/index-lifecycle-management/skip-rollover.md). Defaults to `false`. + +$$$index-lifecycle-name$$$ + +`index.lifecycle.name` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-settings), string) The name of the policy to use to manage the index. For information about how {{es}} applies policy changes, see [Policy updates](docs-content://manage-data/lifecycle/index-lifecycle-management/policy-updates.md). If you are restoring an index from snapshot that was previously managed by {{ilm}}, you can override this setting to null during the restore operation to disable further management of the index. See also [Index level settings](#index-lifecycle-rollover-alias). + +$$$index-lifecycle-origination-date$$$ + +`index.lifecycle.origination_date` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-settings), long) If specified, this is the timestamp used to calculate the index age for its phase transitions. Use this setting if you create a new index that contains old data and want to use the original creation date to calculate the index age. Specified as a Unix epoch value in milliseconds. + +$$$index-lifecycle-parse-origination-date$$$ + +`index.lifecycle.parse_origination_date` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-settings), Boolean) Set to `true` to parse the origination date from the index name. This origination date is used to calculate the index age for its phase transitions. The index name must match the pattern `^.*-{{date_format}}-\\d+`, where the `date_format` is `yyyy.MM.dd` and the trailing digits are optional. An index that was rolled over would normally match the full format, for example `logs-2016.10.31-000002`). If the index name doesn’t match the pattern, index creation fails. + +$$$index-lifecycle-step-wait-time-threshold$$$ + +`index.lifecycle.step.wait_time_threshold` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-settings), [time value](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) Time to wait for the cluster to resolve allocation issues during an {{ilm-init}} [`shrink`](/reference/elasticsearch/index-lifecycle-actions/ilm-shrink.md) action. Must be greater than `1h` (1 hour). Defaults to `12h` (12 hours). See [Shard allocation for shrink](/reference/elasticsearch/index-lifecycle-actions/ilm-shrink.md#ilm-shrink-shard-allocation). + +$$$index-lifecycle-rollover-alias$$$ + +`index.lifecycle.rollover_alias` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-settings), string) The index alias to update when the index rolls over. Specify when using a policy that contains a rollover action. When the index rolls over, the alias is updated to reflect that the index is no longer the write index. For more information about rolling indices, see [Rollover](docs-content://manage-data/lifecycle/index-lifecycle-management/rollover.md). If you are restoring an index from snapshot that was previously managed by {{ilm}}, you can override this setting to null during the restore operation to disable further management of future indices. See also [Index level settings](#index-lifecycle-name). + + diff --git a/docs/reference/elasticsearch/configuration-reference/index-management-settings.md b/docs/reference/elasticsearch/configuration-reference/index-management-settings.md new file mode 100644 index 0000000000000..e96b224ee1bd8 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/index-management-settings.md @@ -0,0 +1,116 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/index-management-settings.html +--- + +# Index management settings [index-management-settings] + +You can use the following cluster settings to enable or disable index management features. + +$$$auto-create-index$$$ + +`action.auto_create_index` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) [Automatically create an index](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-create) if it doesn’t already exist and apply any configured index templates. Defaults to `true`. + +$$$action-destructive-requires-name$$$ + +`action.destructive_requires_name` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) When set to `true`, you must specify the index name to [delete an index](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-delete). It is not possible to delete all indices with `_all` or use wildcards. Defaults to `true`. + +$$$cluster-indices-close-enable$$$ + +`cluster.indices.close.enable` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Enables [closing of open indices](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-close) in {{es}}. If `false`, you cannot close open indices. Defaults to `true`. + + ::::{note} + Closed indices still consume a significant amount of disk space. + :::: + + +$$$stack-templates-enabled$$$ + +`stack.templates.enabled` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) If `true`, enables built-in index and component templates. [{{agent}}](docs-content://reference/ingestion-tools/fleet/index.md) uses these templates to create data streams. If `false`, {{es}} disables these index and component templates. Defaults to `true`. + +::::{note} +It is not recommended to disable the built-in stack templates, as some functionality of {{es}} or Kibana will not work correctly when disabled. Features like log and metric collection, as well as Kibana reporting, may malfunction without the built-in stack templates. Stack templates should only be disabled temporarily, if necessary, to resolve upgrade issues, then re-enabled after any issues have been resolved. +:::: + + +This setting affects the following built-in index templates: + +* `.kibana-reporting*` +* `logs-*-*` +* `metrics-*-*` +* `synthetics-*-*` +* `profiling-*` +* `security_solution-*-*` + +This setting also affects the following built-in component templates: + +* `kibana-reporting@settings` +* `logs@mappings` +* `logs@settings` +* `metrics@mappings` +* `metrics@settings` +* `metrics@tsdb-settings` +* `synthetics@mapping` +* `synthetics@settings` + + + +## Reindex settings [reindex-settings] + +$$$reindex-remote-whitelist$$$ + +`reindex.remote.whitelist` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the hosts that can be [reindexed from remotely](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-reindex). Expects a YAML array of `host:port` strings. Consists of a comma-delimited list of `host:port` entries. Defaults to `["\*.io:*", "\*.com:*"]`. + +`reindex.ssl.certificate` +: Specifies the path to the PEM encoded certificate (or certificate chain) to be used for HTTP client authentication (if required by the remote cluster) This setting requires that `reindex.ssl.key` also be set. You cannot specify both `reindex.ssl.certificate` and `reindex.ssl.keystore.path`. + +`reindex.ssl.certificate_authorities` +: List of paths to PEM encoded certificate files that should be trusted. You cannot specify both `reindex.ssl.certificate_authorities` and `reindex.ssl.truststore.path`. + +`reindex.ssl.key` +: Specifies the path to the PEM encoded private key associated with the certificate used for client authentication (`reindex.ssl.certificate`). You cannot specify both `reindex.ssl.key` and `reindex.ssl.keystore.path`. + +`reindex.ssl.key_passphrase` +: Specifies the passphrase to decrypt the PEM encoded private key (`reindex.ssl.key`) if it is encrypted. [7.17.0] Prefer `reindex.ssl.secure_key_passphrase` instead. Cannot be used with `reindex.ssl.secure_key_passphrase`. + +`reindex.ssl.keystore.key_password` +: The password for the key in the keystore (`reindex.ssl.keystore.path`). Defaults to the keystore password. [7.17.0] Prefer `reindex.ssl.keystore.secure_key_password` instead. This setting cannot be used with `reindex.ssl.keystore.secure_key_password`. + +`reindex.ssl.keystore.password` +: The password to the keystore (`reindex.ssl.keystore.path`). [7.17.0] Prefer `reindex.ssl.keystore.secure_password` instead. This setting cannot be used with `reindex.ssl.keystore.secure_password`. + +`reindex.ssl.keystore.path` +: Specifies the path to the keystore that contains a private key and certificate to be used for HTTP client authentication (if required by the remote cluster). This keystore can be in "JKS" or "PKCS#12" format. You cannot specify both `reindex.ssl.key` and `reindex.ssl.keystore.path`. + +`reindex.ssl.keystore.type` +: The type of the keystore (`reindex.ssl.keystore.path`). Must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx" or "pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`reindex.ssl.secure_key_passphrase` ([Secure](docs-content://deploy-manage/security/secure-settings.md)) +: Specifies the passphrase to decrypt the PEM encoded private key (`reindex.ssl.key`) if it is encrypted. Cannot be used with `reindex.ssl.key_passphrase`. + +`reindex.ssl.keystore.secure_key_password` ([Secure](docs-content://deploy-manage/security/secure-settings.md)) +: The password for the key in the keystore (`reindex.ssl.keystore.path`). Defaults to the keystore password. This setting cannot be used with `reindex.ssl.keystore.key_password`. + +`reindex.ssl.keystore.secure_password` ([Secure](docs-content://deploy-manage/security/secure-settings.md)) +: The password to the keystore (`reindex.ssl.keystore.path`). This setting cannot be used with `reindex.ssl.keystore.password`. + +`reindex.ssl.truststore.password` +: The password to the truststore (`reindex.ssl.truststore.path`). [7.17.0] Prefer `reindex.ssl.truststore.secure_password` instead. This setting cannot be used with `reindex.ssl.truststore.secure_password`. + +`reindex.ssl.truststore.path` +: The path to the Java Keystore file that contains the certificates to trust. This keystore can be in "JKS" or "PKCS#12" format. You cannot specify both `reindex.ssl.certificate_authorities` and `reindex.ssl.truststore.path`. + +`reindex.ssl.truststore.secure_password` ([Secure](docs-content://deploy-manage/security/secure-settings.md)) +: The password to the truststore (`reindex.ssl.truststore.path`). This setting cannot be used with `reindex.ssl.truststore.password`. + +`reindex.ssl.truststore.type` +: The type of the truststore (`reindex.ssl.truststore.path`). Must be either `jks` or `PKCS12`. If the truststore path ends in ".p12", ".pfx" or "pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`reindex.ssl.verification_mode` +: Indicates the type of verification to protect against man in the middle attacks and certificate forgery. One of `full` (verify the hostname and the certificate path), `certificate` (verify the certificate path, but not the hostname) or `none` (perform no verification - this is strongly discouraged in production environments). Defaults to `full`. + diff --git a/docs/reference/elasticsearch/configuration-reference/index-recovery-settings.md b/docs/reference/elasticsearch/configuration-reference/index-recovery-settings.md new file mode 100644 index 0000000000000..98513f7efc18a --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/index-recovery-settings.md @@ -0,0 +1,122 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/recovery.html +--- + +# Index recovery settings [recovery] + +Peer recovery syncs data from a primary shard to a new or existing shard copy. + +Peer recovery automatically occurs when {{es}}: + +* Recreates a shard lost during node failure +* Relocates a shard to another node due to a cluster rebalance or changes to the [shard allocation settings](/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings.md) + +You can view a list of in-progress and completed recoveries using the [cat recovery API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cat-recovery). + + +## Recovery settings [recovery-settings] + +`indices.recovery.max_bytes_per_sec` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Limits total inbound and outbound recovery traffic for each node. Applies to both peer recoveries as well as snapshot recoveries (i.e., restores from a snapshot). Defaults to `40mb` unless the node is a dedicated [cold](docs-content://manage-data/lifecycle/data-tiers.md#cold-tier) or [frozen](docs-content://manage-data/lifecycle/data-tiers.md#frozen-tier) node, in which case the default relates to the total memory available to the node: + + | Total memory | Default recovery rate on cold and frozen nodes | + | --- | --- | + | ≤ 4 GB | 40 MB/s | + | > 4 GB and ≤ 8 GB | 60 MB/s | + | > 8 GB and ≤ 16 GB | 90 MB/s | + | > 16 GB and ≤ 32 GB | 125 MB/s | + | > 32 GB | 250 MB/s | + + This limit applies to each node separately. If multiple nodes in a cluster perform recoveries at the same time, the cluster’s total recovery traffic may exceed this limit. + + If this limit is too high, ongoing recoveries may consume an excess of bandwidth and other resources, which can have a performance impact on your cluster and in extreme cases may destabilize it. + + This is a dynamic setting, which means you can set it in each node’s `elasticsearch.yml` config file and you can update it dynamically using the [cluster update settings API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings). If you set it dynamically then the same limit applies on every node in the cluster. If you do not set it dynamically then you can set a different limit on each node, which is useful if some of your nodes have better bandwidth than others. For example, if you are using [Index Lifecycle Management](docs-content://manage-data/lifecycle/index-lifecycle-management.md) then you may be able to give your hot nodes a higher recovery bandwidth limit than your warm nodes. + + + +## Expert peer recovery settings [_expert_peer_recovery_settings] + +You can use the following *expert* setting to manage resources for peer recoveries. + +`indices.recovery.max_concurrent_file_chunks` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings), Expert) Number of file chunks sent in parallel for each recovery. Defaults to `2`. + + You can increase the value of this setting when the recovery of a single shard is not reaching the traffic limit set by `indices.recovery.max_bytes_per_sec`, up to a maximum of `8`. + + +`indices.recovery.max_concurrent_operations` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings), Expert) Number of operations sent in parallel for each recovery. Defaults to `1`. + + Concurrently replaying operations during recovery can be very resource-intensive and may interfere with indexing, search, and other activities in your cluster. Do not increase this setting without carefully verifying that your cluster has the resources available to handle the extra load that will result. + + +`indices.recovery.use_snapshots` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings), Expert) Enables snapshot-based peer recoveries. + + {{es}} recovers replicas and relocates primary shards using the *peer recovery* process, which involves constructing a new copy of a shard on the target node. When `indices.recovery.use_snapshots` is `false` {{es}} will construct this new copy by transferring the index data from the current primary. When this setting is `true` {{es}} will attempt to copy the index data from a recent snapshot first, and will only copy data from the primary if it cannot identify a suitable snapshot. Defaults to `true`. + + Setting this option to `true` reduces your operating costs if your cluster runs in an environment where the node-to-node data transfer costs are higher than the costs of recovering data from a snapshot. It also reduces the amount of work that the primary must do during a recovery. + + Additionally, repositories having the setting `use_for_peer_recovery=true` will be consulted to find a good snapshot when recovering a shard. If none of the registered repositories have this setting defined, index files will be recovered from the source node. + + +`indices.recovery.max_concurrent_snapshot_file_downloads` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings), Expert) Number of snapshot file downloads requests sent in parallel to the target node for each recovery. Defaults to `5`. + + Do not increase this setting without carefully verifying that your cluster has the resources available to handle the extra load that will result. + + +`indices.recovery.max_concurrent_snapshot_file_downloads_per_node` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings), Expert) Number of snapshot file downloads requests executed in parallel in the target node for all recoveries. Defaults to `25`. + + Do not increase this setting without carefully verifying that your cluster has the resources available to handle the extra load that will result. + + + +## Recovery settings for managed services [recovery-settings-for-managed-services] + +::::{note} +{cloud-only} +:::: + + +When running {{es}} as a managed service, the following settings allow the service to specify absolute maximum bandwidths for disk reads, disk writes, and network traffic on each node, and permit you to control the maximum recovery bandwidth on each node in terms of these absolute maximum values. They have two effects: + +1. They determine the bandwidth used for recovery if `indices.recovery.max_bytes_per_sec` is not set, overriding the default behaviour described above. +2. They impose a node-wide limit on recovery bandwidth which is independent of the value of `indices.recovery.max_bytes_per_sec`. + +If you do not set `indices.recovery.max_bytes_per_sec` then the maximum recovery bandwidth is computed as a proportion of the absolute maximum bandwidth. The computation is performed separately for read and write traffic. The service defines the absolute maximum bandwidths for disk reads, disk writes, and network transfers using `node.bandwidth.recovery.disk.read`, `node.bandwidth.recovery.disk.write` and `node.bandwidth.recovery.network` respectively, and you can set the proportion of the absolute maximum bandwidth that may be used for recoveries by adjusting `node.bandwidth.recovery.factor.read` and `node.bandwidth.recovery.factor.write`. If the {{operator-feature}} is enabled then the service may also set default proportions using operator-only variants of these settings. + +If you set `indices.recovery.max_bytes_per_sec` then {{es}} will use its value for the maximum recovery bandwidth, as long as this does not exceed the node-wide limit. {{es}} computes the node-wide limit by multiplying the absolute maximum bandwidths by the `node.bandwidth.recovery.operator.factor.max_overcommit` factor. If you set `indices.recovery.max_bytes_per_sec` in excess of the node-wide limit then the node-wide limit takes precedence. + +The service should determine values for the absolute maximum bandwidths settings by experiment, using a recovery-like workload in which there are several concurrent workers each processing files sequentially in chunks of 512kiB. + +`node.bandwidth.recovery.disk.read` +: ([byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units) per second) The absolute maximum disk read speed for a recovery-like workload on the node. If set, `node.bandwidth.recovery.disk.write` and `node.bandwidth.recovery.network` must also be set. + +`node.bandwidth.recovery.disk.write` +: ([byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units) per second) The absolute maximum disk write speed for a recovery-like workload on the node. If set, `node.bandwidth.recovery.disk.read` and `node.bandwidth.recovery.network` must also be set. + +`node.bandwidth.recovery.network` +: ([byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units) per second) The absolute maximum network throughput for a recovery-like workload on the node, which applies to both reads and writes. If set, `node.bandwidth.recovery.disk.read` and `node.bandwidth.recovery.disk.write` must also be set. + +`node.bandwidth.recovery.factor.read` +: (float, [dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The proportion of the maximum read bandwidth that may be used for recoveries if `indices.recovery.max_bytes_per_sec` is not set. Must be greater than `0` and not greater than `1`. If not set, the value of `node.bandwidth.recovery.operator.factor.read` is used. If no factor settings are set then the value `0.4` is used. + +`node.bandwidth.recovery.factor.write` +: (float, [dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The proportion of the maximum write bandwidth that may be used for recoveries if `indices.recovery.max_bytes_per_sec` is not set. Must be greater than `0` and not greater than `1`. If not set, the value of `node.bandwidth.recovery.operator.factor.write` is used. If no factor settings are set then the value `0.4` is used. + +`node.bandwidth.recovery.operator.factor.read` +: (float, [dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The proportion of the maximum read bandwidth that may be used for recoveries if `indices.recovery.max_bytes_per_sec` and `node.bandwidth.recovery.factor.read` are not set. Must be greater than `0` and not greater than `1`. If not set, the value of `node.bandwidth.recovery.operator.factor` is used. If no factor settings are set then the value `0.4` is used. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. + +`node.bandwidth.recovery.operator.factor.write` +: (float, [dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The proportion of the maximum write bandwidth that may be used for recoveries if `indices.recovery.max_bytes_per_sec` and `node.bandwidth.recovery.factor.write` are not set. Must be greater than `0` and not greater than `1`. If not set, the value of `node.bandwidth.recovery.operator.factor` is used. If no factor settings are set then the value `0.4` is used. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. + +`node.bandwidth.recovery.operator.factor` +: (float, [dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The proportion of the maximum bandwidth that may be used for recoveries if neither `indices.recovery.max_bytes_per_sec` nor any other factor settings are set. Must be greater than `0` and not greater than `1`. Defaults to `0.4`. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. + +`node.bandwidth.recovery.operator.factor.max_overcommit` +: (float, [dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The proportion of the absolute maximum bandwidth that may be used for recoveries regardless of any other settings. Must be greater than `0`. Defaults to `100`. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. + diff --git a/docs/reference/elasticsearch/configuration-reference/index.md b/docs/reference/elasticsearch/configuration-reference/index.md new file mode 100644 index 0000000000000..64f419c9ad97f --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/index.md @@ -0,0 +1,40 @@ +--- +navigation_title: "Configuration" +--- + +# Elasticsearch configuration reference + +Configuration settings enable you to customize the behavior of Elasticsearch features. +This reference provides details about each setting, such as its purpose, default behavior, and availability in Elastic Cloud environments. + +% TO-DO: For information about how to update these settings, refer to "Configure Elasticsearch" (link to appropriate deployment pages) % + +The settings are grouped by feature or purpose, for example: + +- [Auditing](/reference/elasticsearch/configuration-reference/auding-settings.md) +- [Circuit breaker](/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md) +- [Cluster formation and discovery](/reference/elasticsearch/configuration-reference/discovery-cluster-formation-settings.md) +- [Cross-cluster replication](/reference/elasticsearch/configuration-reference/cross-cluster-replication-settings.md) +- [Data stream lifecycle](/reference/elasticsearch/configuration-reference/data-stream-lifecycle-settings.md) +- [Field data cache](/reference/elasticsearch/configuration-reference/field-data-cache-settings.md) +- [Health diagnostic](/reference/elasticsearch/configuration-reference/health-diagnostic-settings.md) +- [Index lifecycle management](/reference/elasticsearch/configuration-reference/index-lifecycle-management-settings.md), +- [Index management](/reference/elasticsearch/configuration-reference/index-management-settings.md) +- [Index recovery](/reference/elasticsearch/configuration-reference/index-recovery-settings.md) +- [Index buffer](/reference/elasticsearch/configuration-reference/indexing-buffer-settings.md) +- [Inference](/reference/elasticsearch/configuration-reference/inference-settings.md) +- [License](/reference/elasticsearch/configuration-reference/license-settings.md) +- [Local gateway](/reference/elasticsearch/configuration-reference/local-gateway.md) +- [Machine learning](/reference/elasticsearch/configuration-reference/machine-learning-settings.md) +- [Monitoring](/reference/elasticsearch/configuration-reference/monitoring-settings.md) +- [Networking](/reference/elasticsearch/configuration-reference/networking-settings.md) +- [Nodes](/reference/elasticsearch/configuration-reference/node-settings.md) +- [Node query cache](/reference/elasticsearch/configuration-reference/node-query-cache-settings.md) +- [Search](/reference/elasticsearch/configuration-reference/search-settings.md) +- [Security](/reference/elasticsearch/configuration-reference/security-settings.md) +- [Shard request cache](/reference/elasticsearch/configuration-reference/shard-request-cache-settings.md) +- [Shard routing](/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings.md) +- [Snapshot and restore](/reference/elasticsearch/configuration-reference/snapshot-restore-settings.md) +- [Transforms](/reference/elasticsearch/configuration-reference/transforms-settings.md) +- [Thread pools](/reference/elasticsearch/configuration-reference/thread-pool-settings.md) +- [Watcher](/reference/elasticsearch/configuration-reference/watcher-settings.md) \ No newline at end of file diff --git a/docs/reference/elasticsearch/configuration-reference/indexing-buffer-settings.md b/docs/reference/elasticsearch/configuration-reference/indexing-buffer-settings.md new file mode 100644 index 0000000000000..8e68a30243425 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/indexing-buffer-settings.md @@ -0,0 +1,20 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/indexing-buffer.html +--- + +# Indexing buffer settings [indexing-buffer] + +The indexing buffer is used to store newly indexed documents. When it fills up, the documents in the buffer are written to a segment on disk. It is divided between all shards on the node. + +The following settings are *static* and must be configured on every data node in the cluster: + +`indices.memory.index_buffer_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Accepts either a percentage or a byte size value. It defaults to `10%`, meaning that `10%` of the total heap allocated to a node will be used as the indexing buffer size shared across all shards. + +`indices.memory.min_index_buffer_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) If the `index_buffer_size` is specified as a percentage, then this setting can be used to specify an absolute minimum. Defaults to `48mb`. + +`indices.memory.max_index_buffer_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) If the `index_buffer_size` is specified as a percentage, then this setting can be used to specify an absolute maximum. Defaults to unbounded. + diff --git a/docs/reference/elasticsearch/configuration-reference/inference-settings.md b/docs/reference/elasticsearch/configuration-reference/inference-settings.md new file mode 100644 index 0000000000000..7e1b92c4f284a --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/inference-settings.md @@ -0,0 +1,66 @@ +--- +navigation_title: "Inference settings" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-settings.html +--- + +# Inference API settings in {{es}} [inference-settings] + + +$$$inference-settings-description$$$ +You do not need to configure any settings to use the {{infer}} APIs. Each setting has a default. + + +### Inference API logging settings [xpack-inference-logging] + +When certain failures occur, a log message is emitted. In the case of a reoccurring failure the logging throttler restricts repeated messages from being logged. + +`xpack.inference.logging.reset_interval` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Specifies the interval for when a cleanup thread will clear an internal cache of the previously logged messages. Defaults to one day (`1d`). + +`xpack.inference.logging.wait_duration` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Specifies the amount of time to wait after logging a message before that message can be logged again. Defaults to one hour (`1h`). + +## {{infer-cap}} API HTTP settings [xpack-inference-http-settings] + +`xpack.inference.http.max_response_size` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Specifies the maximum size in bytes an HTTP response is allowed to have, defaults to `50mb`, the maximum configurable value is `100mb`. + +`xpack.inference.http.max_total_connections` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Specifies the maximum number of connections the internal connection pool can lease. Defaults to `50`. + +`xpack.inference.http.max_route_connections` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Specifies the maximum number of connections a single route can lease from the internal connection pool. If this setting is set to a value equal to or greater than `xpack.inference.http.max_total_connections`, then a single third party service could lease all available connections and other third party services would be unable to lease connections. Defaults to `20`. + +`xpack.inference.http.connection_eviction_interval` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Specifies the interval that an eviction thread will run to remove expired and stale connections from the internal connection pool. Decreasing this time value can help improve throughput if multiple third party service are contending for the available connections in the pool. Defaults to one minute (`1m`). + +`xpack.inference.http.connection_eviction_max_idle_time` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Specifies the maximum duration a connection can be unused before it is marked as idle and can be closed and removed from the shared connection pool. Defaults to one minute (`1m`). + +`xpack.inference.http.request_executor.queue_capacity` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Specifies the size of the internal queue for requests waiting to be sent. If the queue is full and a request is sent to the {{infer}} API, it will be rejected. Defaults to `2000`. + + +## {{infer-cap}} API HTTP Retry settings [xpack-inference-http-retry-settings] + +When a third-party service returns a transient failure code (for example, 429), the request is retried by the {{infer}} API. These settings govern the retry behavior. When a request is retried, exponential backoff is used. + +`xpack.inference.http.retry.initial_delay` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Specifies the initial delay before retrying a request. Defaults to one second (`1s`). + +`xpack.inference.http.retry.max_delay_bound` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Specifies the maximum delay for a request. Defaults to five seconds (`5s`). + +`xpack.inference.http.retry.timeout` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Specifies the maximum amount of time a request can be retried. Once the request exceeds this time, the request will no longer be retried and a failure will be returned. Defaults to 30 seconds (`30s`). + + +## {{infer-cap}} API Input text [xpack-inference-input-text] + +For certain third-party service integrations, when the service returns an error indicating that the request input was too large, the input will be truncated and the request is retried. These settings govern how the truncation is performed. + +`xpack.inference.truncator.reduction_percentage` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Specifies the percentage to reduce the input text by if the 3rd party service responds with an error indicating it is too long. Defaults to 50 percent (`0.5`). + + diff --git a/docs/reference/elasticsearch/configuration-reference/license-settings.md b/docs/reference/elasticsearch/configuration-reference/license-settings.md new file mode 100644 index 0000000000000..cc5caf247fd8d --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/license-settings.md @@ -0,0 +1,15 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/license-settings.html +--- + +# License settings [license-settings] + +You can configure this licensing setting in the `elasticsearch.yml` file. For more information, see [License management](docs-content://deploy-manage/license/manage-your-license-in-self-managed-cluster.md). + +`xpack.license.self_generated.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set to `basic` (default) to enable basic {{xpack}} features.
+ + If set to `trial`, the self-generated license gives access only to all the features of a x-pack for 30 days. You can later downgrade the cluster to a basic license if needed. + + diff --git a/docs/reference/elasticsearch/configuration-reference/local-gateway.md b/docs/reference/elasticsearch/configuration-reference/local-gateway.md new file mode 100644 index 0000000000000..e8bd29240b8e3 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/local-gateway.md @@ -0,0 +1,37 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-gateway.html +--- + +# Local gateway [modules-gateway] + +$$$dangling-indices$$$ +The local gateway stores the cluster state and shard data across full cluster restarts. + +The following *static* settings, which must be set on every [master-eligible node](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#master-node-role), control how long a freshly elected master should wait before it tries to recover the [cluster state](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-state) and the cluster’s data. + +::::{note} +These settings only take effect during a [full cluster restart](docs-content://deploy-manage/maintenance/start-stop-services/full-cluster-restart-rolling-restart-procedures.md#restart-cluster-full). +:::: + + +`gateway.expected_data_nodes` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Number of data nodes expected in the cluster. Recovery of local shards begins when the expected number of data nodes join the cluster. Defaults to `0`. + +`gateway.recover_after_time` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) If the expected number of nodes is not achieved, the recovery process waits for the configured amount of time before trying to recover. Defaults to `5m`. + + Once the `recover_after_time` duration has timed out, recovery will start as long as the following condition is met: + + +`gateway.recover_after_data_nodes` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Recover as long as this many data nodes have joined the cluster. + +These settings can be configured in `elasticsearch.yml` as follows: + +```yaml +gateway.expected_data_nodes: 3 +gateway.recover_after_time: 600s +gateway.recover_after_data_nodes: 3 +``` + diff --git a/docs/reference/elasticsearch/configuration-reference/machine-learning-settings.md b/docs/reference/elasticsearch/configuration-reference/machine-learning-settings.md new file mode 100644 index 0000000000000..1fb8b85491662 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/machine-learning-settings.md @@ -0,0 +1,142 @@ +--- +navigation_title: "Machine learning settings" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-settings.html +--- + +# Machine learning settings in Elasticsearch [ml-settings] + + +$$$ml-settings-description$$$ +You do not need to configure any settings to use {{ml}}. It is enabled by default. + +::::{important} +{{ml-cap}} uses SSE4.2 instructions on x86_64 machines, so it works only on x86_64 machines whose CPUs [support](https://en.wikipedia.org/wiki/SSE4#Supporting_CPUs) SSE4.2. (This limitation does not apply to aarch64 machines.) If you run {{es}} on older x86_64 hardware, you must disable {{ml}} (by setting `xpack.ml.enabled` to `false`). In this situation you should not attempt to use {{ml}} functionality in your cluster at all. +:::: + + +::::{tip} +To control memory usage used by {{ml}} jobs, you can use the [machine learning circuit breaker settings](/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md#circuit-breakers-page-model-inference). +:::: + + + +## General machine learning settings [general-ml-settings] + +`node.roles: [ ml ]` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set `node.roles` to contain `ml` to identify the node as a *{{ml}} node*. If you want to run {{ml}} jobs, there must be at least one {{ml}} node in your cluster. + + If you set `node.roles`, you must explicitly specify all the required roles for the node. To learn more, refer to [Node settings](/reference/elasticsearch/configuration-reference/node-settings.md). + + ::::{important} + * On dedicated coordinating nodes or dedicated master nodes, do not set the `ml` role. + * It is strongly recommended that dedicated {{ml}} nodes also have the `remote_cluster_client` role; otherwise, {{ccs}} fails when used in {{ml}} jobs or {{dfeeds}}. See [Remote-eligible node](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#remote-node). + + :::: + + +`xpack.ml.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The default value (`true`) enables {{ml}} APIs on the node. + + ::::{important} + If you want to use {{ml-features}} in your cluster, it is recommended that you use the default value for this setting on all nodes. + :::: + + + If set to `false`, the {{ml}} APIs are disabled on the node. For example, the node cannot open jobs, start {{dfeeds}}, receive transport (internal) communication requests, or requests from clients (including {{kib}}) related to {{ml}} APIs. If `xpack.ml.enabled` is not set uniformly across all nodes in your cluster then you are likely to experience problems with {{ml}} functionality not fully working. + + You must not use any {{ml}} functionality from ingest pipelines if `xpack.ml.enabled` is `false` on any node. Before setting `xpack.ml.enabled` to `false` on a node, consider whether you really meant to just exclude `ml` from the `node.roles`. Excluding `ml` from the [`node.roles`](/reference/elasticsearch/configuration-reference/node-settings.md#node-roles) will stop the node from running {{ml}} jobs and NLP models, but it will still be aware that {{ml}} functionality exists. Setting `xpack.ml.enabled` to `false` should be reserved for situations where you cannot use {{ml}} functionality at all in your cluster due to hardware limitations as described [above](#ml-settings-description). + + +`xpack.ml.inference_model.cache_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum inference cache size allowed. The inference cache exists in the JVM heap on each ingest node. The cache affords faster processing times for the `inference` processor. The value can be a static byte sized value (such as `2gb`) or a percentage of total allocated heap. Defaults to `40%`. See also [{{ml-cap}} circuit breaker](/reference/elasticsearch/configuration-reference/circuit-breaker-settings.md#circuit-breakers-page-model-inference). + +$$$xpack-interference-model-ttl$$$ + +`xpack.ml.inference_model.time_to_live` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The time to live (TTL) for trained models in the inference model cache. The TTL is calculated from last access. Users of the cache (such as the inference processor or inference aggregator) cache a model on its first use and reset the TTL on every use. If a cached model is not accessed for the duration of the TTL, it is flagged for eviction from the cache. If a document is processed later, the model is again loaded into the cache. To update this setting in {{ess}}, see [Add {{es}} user settings](/reference/elasticsearch/configuration-reference/elastic-cloud-hosted-elasticsearch-settings.md). Defaults to `5m`. + +`xpack.ml.max_inference_processors` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The total number of `inference` type processors allowed across all ingest pipelines. Once the limit is reached, adding an `inference` processor to a pipeline is disallowed. Defaults to `50`. + +`xpack.ml.max_machine_memory_percent` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The maximum percentage of the machine’s memory that {{ml}} may use for running analytics processes. These processes are separate to the {{es}} JVM. The limit is based on the total memory of the machine, not current free memory. Jobs are not allocated to a node if doing so would cause the estimated memory use of {{ml}} jobs to exceed the limit. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. The minimum value is `5`; the maximum value is `200`. Defaults to `30`. + + ::::{tip} + Do not configure this setting to a value higher than the amount of memory left over after running the {{es}} JVM unless you have enough swap space to accommodate it and have determined this is an appropriate configuration for a specialist use case. The maximum setting value is for the special case where it has been determined that using swap space for {{ml}} jobs is acceptable. The general best practice is to not use swap on {{es}} nodes. + :::: + + +`xpack.ml.max_model_memory_limit` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The maximum `model_memory_limit` property value that can be set for any {{ml}} jobs in this cluster. If you try to create a job with a `model_memory_limit` property value that is greater than this setting value, an error occurs. Existing jobs are not affected when you update this setting. If this setting is `0` or unset, there is no maximum `model_memory_limit` value. If there are no nodes that meet the memory requirements for a job, this lack of a maximum memory limit means it’s possible to create jobs that cannot be assigned to any available nodes. For more information about the `model_memory_limit` property, see [Create {{anomaly-jobs}}](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-put-job) or [Create {{dfanalytics-jobs}}](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-put-data-frame-analytics). Defaults to `0` if `xpack.ml.use_auto_machine_memory_percent` is `false`. If `xpack.ml.use_auto_machine_memory_percent` is `true` and `xpack.ml.max_model_memory_limit` is not explicitly set then it will default to the largest `model_memory_limit` that could be assigned in the cluster. + +$$$xpack.ml.max_open_jobs$$$ + +`xpack.ml.max_open_jobs` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The maximum number of jobs that can run simultaneously on a node. In this context, jobs include both {{anomaly-jobs}} and {{dfanalytics-jobs}}. The maximum number of jobs is also constrained by memory usage. Thus if the estimated memory usage of the jobs would be higher than allowed, fewer jobs will run on a node. Prior to version 7.1, this setting was a per-node non-dynamic setting. It became a cluster-wide dynamic setting in version 7.1. As a result, changes to its value after node startup are used only after every node in the cluster is running version 7.1 or higher. The minimum value is `1`; the maximum value is `512`. Defaults to `512`. + +`xpack.ml.nightly_maintenance_requests_per_second` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The rate at which the nightly maintenance task deletes expired model snapshots and results. The setting is a proxy to the [`requests_per_second`](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-delete-by-query) parameter used in the delete by query requests and controls throttling. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. Valid values must be greater than `0.0` or equal to `-1.0`, where `-1.0` means a default value is used. Defaults to `-1.0` + +`xpack.ml.node_concurrent_job_allocations` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The maximum number of jobs that can concurrently be in the `opening` state on each node. Typically, jobs spend a small amount of time in this state before they move to `open` state. Jobs that must restore large models when they are opening spend more time in the `opening` state. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. Defaults to `2`. + + +## Advanced machine learning settings [advanced-ml-settings] + +These settings are for advanced use cases; the default values are generally sufficient: + +`xpack.ml.enable_config_migration` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Reserved. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. + +`xpack.ml.max_anomaly_records` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The maximum number of records that are output per bucket. Defaults to `500`. + +`xpack.ml.max_lazy_ml_nodes` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The number of lazily spun up {{ml}} nodes. Useful in situations where {{ml}} nodes are not desired until the first {{ml}} job opens. If the current number of {{ml}} nodes is greater than or equal to this setting, it is assumed that there are no more lazy nodes available as the desired number of nodes have already been provisioned. If a job is opened and this setting has a value greater than zero and there are no nodes that can accept the job, the job stays in the `OPENING` state until a new {{ml}} node is added to the cluster and the job is assigned to run on that node. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. Defaults to `0`. + + ::::{important} + This setting assumes some external process is capable of adding {{ml}} nodes to the cluster. This setting is only useful when used in conjunction with such an external process. + :::: + + +`xpack.ml.max_ml_node_size` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The maximum node size for {{ml}} nodes in a deployment that supports automatic cluster scaling. If you set it to the maximum possible size of future {{ml}} nodes, when a {{ml}} job is assigned to a lazy node it can check (and fail quickly) when scaling cannot support the size of the job. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. Defaults to `0b`, which means it will be assumed that automatic cluster scaling can add arbitrarily large nodes to the cluster. + +$$$xpack.ml.model_repository$$$ + +`xpack.ml.model_repository` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The location of the {{ml}} model repository where the model artifact files are available in case of a model installation in a restricted or closed network. `xpack.ml.model_repository` can be a string of a file location or an HTTP/HTTPS server. Example values are: + + ``` + xpack.ml.model_repository: file://${path.home}/config/models/ + ``` + + or + + ``` + xpack.ml.model_repository: https://my-custom-backend + ``` + + If `xpack.ml.model_repository` is a file location, it must point to a subdirectory of the `config` directory of {{es}}. + + +`xpack.ml.persist_results_max_retries` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The maximum number of times to retry bulk indexing requests that fail while processing {{ml}} results. If the limit is reached, the {{ml}} job stops processing data and its status is `failed`. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. The minimum value is `0`; the maximum value is `50`. Defaults to `20`. + +`xpack.ml.process_connect_timeout` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The connection timeout for {{ml}} processes that run separately from the {{es}} JVM. When such processes are started they must connect to the {{es}} JVM. If the process does not connect within the time period specified by this setting then the process is assumed to have failed. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. The minimum value is `5s`. Defaults to `10s`. + +`xpack.ml.use_auto_machine_memory_percent` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) If this setting is `true`, the `xpack.ml.max_machine_memory_percent` setting is ignored. Instead, the maximum percentage of the machine’s memory that can be used for running {{ml}} analytics processes is calculated automatically and takes into account the total node size and the size of the JVM on the node. When the {{operator-feature}} is enabled, this setting can be updated only by operator users. The default value is `false`. + + ::::{important} + * If you do not have dedicated {{ml}} nodes (that is to say, the node has multiple roles), do not enable this setting. Its calculations assume that {{ml}} analytics are the main purpose of the node. + * The calculation assumes that dedicated {{ml}} nodes have at least `256MB` memory reserved outside of the JVM. If you have tiny {{ml}} nodes in your cluster, you shouldn’t use this setting. + + :::: + + + If this setting is `true` it also affects the default value for `xpack.ml.max_model_memory_limit`. In this case `xpack.ml.max_model_memory_limit` defaults to the largest size that could be assigned in the current cluster. + + diff --git a/docs/reference/elasticsearch/configuration-reference/miscellaneous-cluster-settings.md b/docs/reference/elasticsearch/configuration-reference/miscellaneous-cluster-settings.md new file mode 100644 index 0000000000000..7b3ff23563c8d --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/miscellaneous-cluster-settings.md @@ -0,0 +1,160 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/misc-cluster-settings.html +--- + +# Miscellaneous cluster settings [misc-cluster-settings] + + +## Cluster name setting [cluster-name] + +A node can only join a cluster when it shares its `cluster.name` with all the other nodes in the cluster. The default name is `elasticsearch`, but you should change it to an appropriate name that describes the purpose of the cluster. + +```yaml +cluster.name: logging-prod +``` + +::::{important} +Do not reuse the same cluster names in different environments. Otherwise, nodes might join the wrong cluster. +:::: + + +::::{note} +Changing the name of a cluster requires a [full cluster restart](docs-content://deploy-manage/maintenance/start-stop-services/full-cluster-restart-rolling-restart-procedures.md#restart-cluster-full). +:::: + + + +## Metadata [cluster-read-only] + +An entire cluster may be set to read-only with the following setting: + +`cluster.blocks.read_only` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Make the whole cluster read only (indices do not accept write operations), metadata is not allowed to be modified (create or delete indices). Defaults to `false`. + +`cluster.blocks.read_only_allow_delete` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Identical to `cluster.blocks.read_only` but allows to delete indices to free up resources. Defaults to `false`. + +::::{warning} +Don’t rely on this setting to prevent changes to your cluster. Any user with access to the [cluster-update-settings](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings) API can make the cluster read-write again. +:::: + + + +## Cluster shard limits [cluster-shard-limit] + +There is a limit on the number of shards in a cluster, based on the number of nodes in the cluster. This is intended to prevent a runaway process from creating too many shards which can harm performance and in extreme cases may destabilize your cluster. + +::::{important} +These limits are intended as a safety net to protect against runaway shard creation and are not a sizing recommendation. The exact number of shards your cluster can safely support depends on your hardware configuration and workload, and may be smaller than the default limits. + +We do not recommend increasing these limits beyond the defaults. Clusters with more shards may appear to run well in normal operation, but may take a very long time to recover from temporary disruptions such as a network partition or an unexpected node restart, and may encounter problems when performing maintenance activities such as a rolling restart or upgrade. + +:::: + + +If an operation, such as creating a new index, restoring a snapshot of an index, or opening a closed index would lead to the number of shards in the cluster going over this limit, the operation will fail with an error indicating the shard limit. To resolve this, either scale out your cluster by adding nodes, or [delete some indices](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-delete) to bring the number of shards below the limit. + +If a cluster is already over the limit, perhaps due to changes in node membership or setting changes, all operations that create or open indices will fail. + +The cluster shard limit defaults to 1000 shards per non-frozen data node for normal (non-frozen) indices and 3000 shards per frozen data node for frozen indices. Both primary and replica shards of all open indices count toward the limit, including unassigned shards. For example, an open index with 5 primary shards and 2 replicas counts as 15 shards. Closed indices do not contribute to the shard count. + +You can dynamically adjust the cluster shard limit with the following setting: + +$$$cluster-max-shards-per-node$$$ + +`cluster.max_shards_per_node` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Limits the total number of primary and replica shards for the cluster. {{es}} calculates the limit as follows: + +`cluster.max_shards_per_node * number of non-frozen data nodes` + +Shards for closed indices do not count toward this limit. Defaults to `1000`. A cluster with no data nodes is unlimited. + +{{es}} rejects any request that creates more shards than this limit allows. For example, a cluster with a `cluster.max_shards_per_node` setting of `100` and three data nodes has a shard limit of 300. If the cluster already contains 296 shards, {{es}} rejects any request that adds five or more shards to the cluster. + +Note that if `cluster.max_shards_per_node` is set to a higher value than the default, the limits for [mmap count](docs-content://deploy-manage/deploy/self-managed/vm-max-map-count.md) and [open file descriptors](docs-content://deploy-manage/deploy/self-managed/file-descriptors.md) might also require adjustment. + +Notice that frozen shards have their own independent limit. + + +$$$cluster-max-shards-per-node-frozen$$$ + +`cluster.max_shards_per_node.frozen` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Limits the total number of primary and replica frozen shards for the cluster. {{es}} calculates the limit as follows: + +`cluster.max_shards_per_node.frozen * number of frozen data nodes` + +Shards for closed indices do not count toward this limit. Defaults to `3000`. A cluster with no frozen data nodes is unlimited. + +{{es}} rejects any request that creates more frozen shards than this limit allows. For example, a cluster with a `cluster.max_shards_per_node.frozen` setting of `100` and three frozen data nodes has a frozen shard limit of 300. If the cluster already contains 296 shards, {{es}} rejects any request that adds five or more frozen shards to the cluster. + + +::::{note} +These limits only apply to actions which create shards and do not limit the number of shards assigned to each node. To limit the number of shards assigned to each node, use the [`cluster.routing.allocation.total_shards_per_node`](/reference/elasticsearch/index-settings/total-shards-per-node.md#cluster-total-shards-per-node) setting. +:::: + + + +## User-defined cluster metadata [user-defined-data] + +User-defined metadata can be stored and retrieved using the Cluster Settings API. This can be used to store arbitrary, infrequently-changing data about the cluster without the need to create an index to store it. This data may be stored using any key prefixed with `cluster.metadata.`. For example, to store the email address of the administrator of a cluster under the key `cluster.metadata.administrator`, issue this request: + +```console +PUT /_cluster/settings +{ + "persistent": { + "cluster.metadata.administrator": "sysadmin@example.com" + } +} +``` + +::::{important} +User-defined cluster metadata is not intended to store sensitive or confidential information. Any information stored in user-defined cluster metadata will be viewable by anyone with access to the [Cluster Get Settings](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-get-settings) API, and is recorded in the {{es}} logs. +:::: + + + +## Index tombstones [cluster-max-tombstones] + +The cluster state maintains index tombstones to explicitly denote indices that have been deleted. The number of tombstones maintained in the cluster state is controlled by the following setting: + +`cluster.indices.tombstones.size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Index tombstones prevent nodes that are not part of the cluster when a delete occurs from joining the cluster and reimporting the index as though the delete was never issued. To keep the cluster state from growing huge we only keep the last `cluster.indices.tombstones.size` deletes, which defaults to 500. You can increase it if you expect nodes to be absent from the cluster and miss more than 500 deletes. We think that is rare, thus the default. Tombstones don’t take up much space, but we also think that a number like 50,000 is probably too big. + +If {{es}} encounters index data that is absent from the current cluster state, those indices are considered to be dangling. For example, this can happen if you delete more than `cluster.indices.tombstones.size` indices while an {{es}} node is offline. + +You can use the [Dangling indices API](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-indices) to manage this situation. + + +## Logger [cluster-logger] + +The settings which control logging can be updated [dynamically](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting) with the `logger.` prefix. For instance, to increase the logging level of the `indices.recovery` module to `DEBUG`, issue this request: + +```console +PUT /_cluster/settings +{ + "persistent": { + "logger.org.elasticsearch.indices.recovery": "DEBUG" + } +} +``` + + +## Persistent tasks allocation [persistent-tasks-allocation] + +Plugins can create a kind of tasks called persistent tasks. Those tasks are usually long-lived tasks and are stored in the cluster state, allowing the tasks to be revived after a full cluster restart. + +Every time a persistent task is created, the master node takes care of assigning the task to a node of the cluster, and the assigned node will then pick up the task and execute it locally. The process of assigning persistent tasks to nodes is controlled by the following settings: + +`cluster.persistent_tasks.allocation.enable` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Enable or disable allocation for persistent tasks: + +* `all` - (default) Allows persistent tasks to be assigned to nodes +* `none` - No allocations are allowed for any type of persistent task + +This setting does not affect the persistent tasks that are already being executed. Only newly created persistent tasks, or tasks that must be reassigned (after a node left the cluster, for example), are impacted by this setting. + + +`cluster.persistent_tasks.allocation.recheck_interval` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The master node will automatically check whether persistent tasks need to be assigned when the cluster state changes significantly. However, there may be other factors, such as memory usage, that affect whether persistent tasks can be assigned to nodes but do not cause the cluster state to change. This setting controls how often assignment checks are performed to react to these factors. The default is 30 seconds. The minimum permitted value is 10 seconds. + diff --git a/docs/reference/elasticsearch/configuration-reference/monitoring-settings.md b/docs/reference/elasticsearch/configuration-reference/monitoring-settings.md new file mode 100644 index 0000000000000..2d85a63ba83e4 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/monitoring-settings.md @@ -0,0 +1,368 @@ +--- +navigation_title: "Monitoring settings" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/monitoring-settings.html +--- + +# Monitoring settings in {{es}} [monitoring-settings] + + +::::{admonition} Deprecated in 7.16. +:class: warning + +Using the {{es}} Monitoring plugin to collect and ship monitoring data is deprecated. {{agent}} and {{metricbeat}} are the recommended methods for collecting and shipping monitoring data to a monitoring cluster. If you previously configured legacy collection methods, you should migrate to using [{{agent}}](docs-content://deploy-manage/monitor/stack-monitoring/collecting-monitoring-data-with-elastic-agent.md) or [{{metricbeat}}](docs-content://deploy-manage/monitor/stack-monitoring/collecting-monitoring-data-with-metricbeat.md) collection methods. +:::: + + +By default, {{es}} {{monitor-features}} are enabled but data collection is disabled. To enable data collection, use the `xpack.monitoring.collection.enabled` setting. + +Except where noted otherwise, these settings can be dynamically updated on a live cluster with the [cluster-update-settings](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings) API. + +To adjust how monitoring data is displayed in the monitoring UI, configure [`xpack.monitoring` settings](kibana://docs/reference/configuration-reference/monitoring-settings.md) in `kibana.yml`. To control how monitoring data is collected from {{ls}}, configure monitoring settings in `logstash.yml`. + +For more information, see [Monitor a cluster](docs-content://deploy-manage/monitor.md). + + +### General monitoring settings [general-monitoring-settings] + +`xpack.monitoring.enabled` +: [7.8.0] ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) This deprecated setting has no effect. + + +### Monitoring collection settings [monitoring-collection-settings] + +$$$monitoring-settings-description$$$ +The `xpack.monitoring.collection` settings control how data is collected from your {{es}} nodes. + +`xpack.monitoring.collection.enabled` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) [7.16.0] Set to `true` to enable the collection of monitoring data. When this setting is `false` (default), {{es}} monitoring data is not collected and all monitoring data from other sources such as {{kib}}, Beats, and {{ls}} is ignored. + +$$$xpack-monitoring-collection-interval$$$ + +`xpack.monitoring.collection.interval` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: [6.3.0] ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) Setting to `-1` to disable data collection is no longer supported beginning with 7.0.0. + + Controls how often data samples are collected. Defaults to `10s`. If you modify the collection interval, set the `xpack.monitoring.min_interval_seconds` option in `kibana.yml` to the same value. + + +`xpack.monitoring.elasticsearch.collection.enabled` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) [7.16.0] Controls whether statistics about your {{es}} cluster should be collected. Defaults to `true`. This is different from `xpack.monitoring.collection.enabled`, which allows you to enable or disable all monitoring collection. However, this setting simply disables the collection of {{es}} data while still allowing other data (e.g., {{kib}}, {{ls}}, Beats, or APM Server monitoring data) to pass through this cluster. + +`xpack.monitoring.collection.cluster.stats.timeout` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) [7.16.0] Timeout for collecting the cluster statistics, in [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). Defaults to `10s`. + +`xpack.monitoring.collection.node.stats.timeout` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) [7.16.0] Timeout for collecting the node statistics, in [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). Defaults to `10s`. + +`xpack.monitoring.collection.indices` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) [7.16.0] Controls which indices the {{monitor-features}} collect data from. Defaults to all indices. Specify the index names as a comma-separated list, for example `test1,test2,test3`. Names can include wildcards, for example `test*`. You can explicitly exclude indices by prepending `-`. For example `test*,-test3` will monitor all indexes that start with `test` except for `test3`. System indices like .security* or .kibana* always start with a `.` and generally should be monitored. Consider adding `.*` to the list of indices ensure monitoring of system indices. For example: `.*,test*,-test3` + +`xpack.monitoring.collection.index.stats.timeout` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) [7.16.0] Timeout for collecting index statistics, in [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). Defaults to `10s`. + +`xpack.monitoring.collection.index.recovery.active_only` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) [7.16.0] Controls whether or not all recoveries are collected. Set to `true` to collect only active recoveries. Defaults to `false`. + +`xpack.monitoring.collection.index.recovery.timeout` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) [7.16.0] Timeout for collecting the recovery information, in [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). Defaults to `10s`. + +$$$xpack-monitoring-history-duration$$$ + +`xpack.monitoring.history.duration` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) [7.16.0] Retention duration beyond which the indices created by a monitoring exporter are automatically deleted, in [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). Defaults to `7d` (7 days). + + This setting has a minimum value of `1d` (1 day) to ensure that something is being monitored and it cannot be disabled. + + ::::{important} + This setting currently impacts only `local`-type exporters. Indices created using the `http` exporter are not deleted automatically. + :::: + + +`xpack.monitoring.exporters` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Configures where the agent stores monitoring data. By default, the agent uses a local exporter that indexes monitoring data on the cluster where it is installed. Use an HTTP exporter to send data to a separate monitoring cluster. For more information, see [Local exporter settings](#local-exporter-settings), [HTTP exporter settings](#http-exporter-settings), and [How it works](docs-content://deploy-manage/monitor/stack-monitoring.md). + + +### Local exporter settings [local-exporter-settings] + +The `local` exporter is the default exporter used by {{monitor-features}}. As the name is meant to imply, it exports data to the *local* cluster, which means that there is not much needed to be configured. + +If you do not supply *any* exporters, then the {{monitor-features}} automatically create one for you. If any exporter is provided, then no default is added. + +```yaml +xpack.monitoring.exporters.my_local: + type: local +``` + +`type` +: [7.16.0] The value for a Local exporter must always be `local` and it is required. + +`use_ingest` +: Whether to supply a placeholder pipeline to the cluster and a pipeline processor with every bulk request. The default value is `true`. If disabled, then it means that it will not use pipelines, which means that a future release cannot automatically upgrade bulk requests to future-proof them. + +`cluster_alerts.management.enabled` +: [7.16.0] Whether to create cluster alerts for this cluster. The default value is `true`. To use this feature, {{watcher}} must be enabled. If you have a basic license, cluster alerts are not displayed. + +`wait_master.timeout` +: [7.16.0] Time to wait for the master node to setup `local` exporter for monitoring, in [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). After that wait period, the non-master nodes warn the user for possible missing configuration. Defaults to `30s`. + + +### HTTP exporter settings [http-exporter-settings] + +The following lists settings that can be supplied with the `http` exporter. All settings are shown as what follows the name you select for your exporter: + +```yaml +xpack.monitoring.exporters.my_remote: + type: http + host: ["host:port", ...] +``` + +`type` +: [7.16.0] The value for an HTTP exporter must always be `http` and it is required. + +`host` +: [7.16.0] Host supports multiple formats, both as an array or as a single value. Supported formats include `hostname`, `hostname:port`, `http://hostname` `http://hostname:port`, `https://hostname`, and `https://hostname:port`. Hosts cannot be assumed. The default scheme is always `http` and the default port is always `9200` if not supplied as part of the `host` string. + + ```yaml + xpack.monitoring.exporters: + example1: + type: http + host: "10.1.2.3" + example2: + type: http + host: ["http://10.1.2.4"] + example3: + type: http + host: ["10.1.2.5", "10.1.2.6"] + example4: + type: http + host: ["https://10.1.2.3:9200"] + ``` + + +`auth.username` +: [7.16.0] The username is required if `auth.secure_password` is supplied. + +`auth.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md), [reloadable](docs-content://deploy-manage/security/secure-settings.md#reloadable-secure-settings)) [7.16.0] The password for the `auth.username`. + +`connection.timeout` +: [7.16.0] Amount of time that the HTTP connection is supposed to wait for a socket to open for the request, in [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). The default value is `6s`. + +`connection.read_timeout` +: [7.16.0] Amount of time that the HTTP connection is supposed to wait for a socket to send back a response, in [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). The default value is `10 * connection.timeout` (`60s` if neither are set). + +`ssl` +: [7.16.0] Each HTTP exporter can define its own TLS / SSL settings or inherit them. See [{{monitoring}} TLS/SSL settings](#ssl-monitoring-settings). + +`proxy.base_path` +: [7.16.0] The base path to prefix any outgoing request, such as `/base/path` (e.g., bulk requests would then be sent as `/base/path/_bulk`). There is no default value. + +`headers` +: [7.16.0] Optional headers that are added to every request, which can assist with routing requests through proxies. + + ```yaml + xpack.monitoring.exporters.my_remote: + headers: + X-My-Array: [abc, def, xyz] + X-My-Header: abc123 + ``` + + Array-based headers are sent `n` times where `n` is the size of the array. `Content-Type` and `Content-Length` cannot be set. Any headers created by the monitoring agent will override anything defined here. + + +`index.name.time_format` +: [7.16.0] A mechanism for changing the default date suffix for daily monitoring indices. The default format is `yyyy.MM.dd`. For example, `.monitoring-es-7-2021.08.26`. + +`use_ingest` +: Whether to supply a placeholder pipeline to the monitoring cluster and a pipeline processor with every bulk request. The default value is `true`. If disabled, then it means that it will not use pipelines, which means that a future release cannot automatically upgrade bulk requests to future-proof them. + +`cluster_alerts.management.enabled` +: [7.16.0] Whether to create cluster alerts for this cluster. The default value is `true`. To use this feature, {{watcher}} must be enabled. If you have a basic license, cluster alerts are not displayed. + +`cluster_alerts.management.blacklist` +: [7.16.0] Prevents the creation of specific cluster alerts. It also removes any applicable watches that already exist in the current cluster. + + You can add any of the following watch identifiers to the list of blocked alerts: + + * `elasticsearch_cluster_status` + * `elasticsearch_version_mismatch` + * `elasticsearch_nodes` + * `kibana_version_mismatch` + * `logstash_version_mismatch` + * `xpack_license_expiration` + + For example: `["elasticsearch_version_mismatch","xpack_license_expiration"]`. + + +## {{monitoring}} TLS/SSL settings [ssl-monitoring-settings] + +You can configure the following TLS/SSL settings. + +`xpack.monitoring.exporters.$NAME.ssl.supported_protocols` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] Supported protocols with versions. Valid protocols: `SSLv2Hello`, `SSLv3`, `TLSv1`, `TLSv1.1`, `TLSv1.2`, `TLSv1.3`. If the JVM’s SSL provider supports TLSv1.3, the default is `TLSv1.3,TLSv1.2,TLSv1.1`. Otherwise, the default is `TLSv1.2,TLSv1.1`. + + {{es}} relies on your JDK’s implementation of SSL and TLS. View [Supported SSL/TLS versions by JDK version](docs-content://deploy-manage/security/supported-ssltls-versions-by-jdk-version.md) for more information. + + ::::{note} + If `xpack.security.fips_mode.enabled` is `true`, you cannot use `SSLv2Hello` or `SSLv3`. See [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md). + :::: + + +`xpack.monitoring.exporters.$NAME.ssl.verification_mode` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] Controls the verification of certificates. + + ::::{dropdown} Valid values + `full` + : Validates that the provided certificate: has an issue date that’s within the `not_before` and `not_after` dates; chains to a trusted Certificate Authority (CA); has a `hostname` or IP address that matches the names within the certificate. + + `certificate` + : Validates the provided certificate and verifies that it’s signed by a trusted authority (CA), but doesn’t check the certificate `hostname`. + + `none` + : Performs no certificate validation. + + ::::{important} + Setting certificate validation to `none` disables many security benefits of SSL/TLS, which is very dangerous. Only set this value if instructed by Elastic Support as a temporary diagnostic mechanism when attempting to resolve TLS errors. + :::: + + + :::: + + + Defaults to `full`. + + +`xpack.monitoring.exporters.$NAME.ssl.cipher_suites` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] Supported cipher suites vary depending on which version of Java you use. For example, for version 12 the default value is `TLS_AES_256_GCM_SHA384`, `TLS_AES_128_GCM_SHA256`, `TLS_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA`, `TLS_RSA_WITH_AES_256_GCM_SHA384`, `TLS_RSA_WITH_AES_128_GCM_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA256`, `TLS_RSA_WITH_AES_128_CBC_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA`, `TLS_RSA_WITH_AES_128_CBC_SHA`. + + For more information, see Oracle’s [Java Cryptography Architecture documentation](https://docs.oracle.com/en/java/javase/11/security/oracle-providers.md#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2). + + +### {{monitoring}} TLS/SSL key and trusted certificate settings [monitoring-tls-ssl-key-trusted-certificate-settings] + +The following settings are used to specify a private key, certificate, and the trusted certificates that should be used when communicating over an SSL/TLS connection. A private key and certificate are optional and would be used if the server requires client authentication for PKI authentication. + + +### PEM encoded files [_pem_encoded_files] + +When using PEM encoded files, use the following settings: + +`xpack.monitoring.exporters.$NAME.ssl.key` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] Path to a PEM encoded file containing the private key. + + If HTTP client authentication is required, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + +`xpack.monitoring.exporters.$NAME.ssl.key_passphrase` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. [7.17.0] Prefer `ssl.secure_key_passphrase` instead. + + You cannot use this setting and `ssl.secure_key_passphrase` at the same time. + + +`xpack.monitoring.exporters.$NAME.ssl.secure_key_passphrase` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) [7.16.0] The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. + +`xpack.monitoring.exporters.$NAME.ssl.certificate` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] Specifies the path for the PEM encoded certificate (or certificate chain) that is associated with the key. + + This setting can be used only if `ssl.key` is set. + + +`xpack.monitoring.exporters.$NAME.ssl.certificate_authorities` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] List of paths to PEM encoded certificate files that should be trusted. + + This setting and `ssl.truststore.path` cannot be used at the same time. + + + +### Java keystore files [_java_keystore_files] + +When using Java keystore files (JKS), which contain the private key, certificate and certificates that should be trusted, use the following settings: + +`xpack.monitoring.exporters.$NAME.ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`xpack.monitoring.exporters.$NAME.ssl.keystore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the keystore. [7.17.0] Prefer `ssl.keystore.secure_password` instead. + +`xpack.monitoring.exporters.$NAME.ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) [7.16.0] The password for the keystore. + +`xpack.monitoring.exporters.$NAME.ssl.keystore.key_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the key in the keystore. The default is the keystore password. [7.17.0] Prefer `ssl.keystore.secure_key_password` instead. + + You cannot use this setting and `ssl.keystore.secure_password` at the same time. + + +`xpack.monitoring.exporters.$NAME.ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) [7.16.0] The password for the key in the keystore. The default is the keystore password. + +`xpack.monitoring.exporters.$NAME.ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`xpack.monitoring.exporters.$NAME.ssl.truststore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the truststore. [7.17.0] Prefer `ssl.truststore.secure_password` instead. + + You cannot use this setting and `ssl.truststore.secure_password` at the same time. + + +`xpack.monitoring.exporters.$NAME.ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) [7.16.0] Password for the truststore. + + +### PKCS#12 files [monitoring-pkcs12-files] + +{{es}} can be configured to use PKCS#12 container files (`.p12` or `.pfx` files) that contain the private key, certificate and certificates that should be trusted. + +PKCS#12 files are configured in the same way as Java keystore files: + +`xpack.monitoring.exporters.$NAME.ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`xpack.monitoring.exporters.$NAME.ssl.keystore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] The format of the keystore file. It must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or ".pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`xpack.monitoring.exporters.$NAME.ssl.keystore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] The password for the keystore. [7.17.0] Prefer `ssl.keystore.secure_password` instead. + +`xpack.monitoring.exporters.$NAME.ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) [7.16.0] The password for the keystore. + +`xpack.monitoring.exporters.$NAME.ssl.keystore.key_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the key in the keystore. The default is the keystore password. [7.17.0] Prefer `ssl.keystore.secure_key_password` instead. + + You cannot use this setting and `ssl.keystore.secure_password` at the same time. + + +`xpack.monitoring.exporters.$NAME.ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) [7.16.0] The password for the key in the keystore. The default is the keystore password. + +`xpack.monitoring.exporters.$NAME.ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`xpack.monitoring.exporters.$NAME.ssl.truststore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] Set this to `PKCS12` to indicate that the truststore is a PKCS#12 file. + +`xpack.monitoring.exporters.$NAME.ssl.truststore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.16.0] The password for the truststore. [7.17.0] Prefer `ssl.truststore.secure_password` instead. + + You cannot use this setting and `ssl.truststore.secure_password` at the same time. + + +`xpack.monitoring.exporters.$NAME.ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) [7.16.0] Password for the truststore. + + + diff --git a/docs/reference/elasticsearch/configuration-reference/networking-settings.md b/docs/reference/elasticsearch/configuration-reference/networking-settings.md new file mode 100644 index 0000000000000..aab8d53f7f1fc --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/networking-settings.md @@ -0,0 +1,674 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-network.html +--- + +# Networking settings [modules-network] + +Each {{es}} node has two different network interfaces. Clients send requests to {{es}}'s REST APIs using its [HTTP interface](#http-settings), but nodes communicate with other nodes using the [transport interface](#transport-settings). The transport interface is also used for communication with [remote clusters](docs-content://deploy-manage/remote-clusters/remote-clusters-self-managed.md). The transport interface uses a custom binary protocol sent over [long-lived](#long-lived-connections) TCP channels. Both interfaces can be configured to use [TLS for security](docs-content://deploy-manage/security.md). + +You can configure both of these interfaces at the same time using the `network.*` settings. If you have a more complicated network, you might need to configure the interfaces independently using the `http.*` and `transport.*` settings. Where possible, use the `network.*` settings that apply to both interfaces to simplify your configuration and reduce duplication. + +By default {{es}} binds only to `localhost` which means it cannot be accessed remotely. This configuration is sufficient for a local development cluster made of one or more nodes all running on the same host. To form a cluster across multiple hosts, or which is accessible to remote clients, you must adjust some [network settings](#common-network-settings) such as `network.host`. + +::::{admonition} Be careful with the network configuration! +:class: warning + +Never expose an unprotected node to the public internet. If you do, you are permitting anyone in the world to download, modify, or delete any of the data in your cluster. + +:::: + + +Configuring {{es}} to bind to a non-local address will [convert some warnings into fatal exceptions](docs-content://deploy-manage/deploy/self-managed/important-system-configuration.md#dev-vs-prod). If a node refuses to start after configuring its network settings then you must address the logged exceptions before proceeding. + +## Commonly used network settings [common-network-settings] + +Most users will need to configure only the following network settings. + +`network.host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) Sets the address of this node for both HTTP and transport traffic. The node will bind to this address and will also use it as its publish address. Accepts an IP address, a hostname, or a [special value](#network-interface-values). + + Defaults to `_local_`. However, note that [security auto-configuration](docs-content://deploy-manage/deploy/self-managed/installing-elasticsearch.md) will add `http.host: 0.0.0.0` to your `elasticsearch.yml` configuration file, which overrides this default for HTTP traffic. + + +`http.port` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) The port to bind for HTTP client communication. Accepts a single value or a range. If a range is specified, the node will bind to the first available port in the range. + + Defaults to `9200-9300`. + + +`transport.port` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) The port to bind for communication between nodes. Accepts a single value or a range. If a range is specified, the node will bind to the first available port in the range. Set this setting to a single port, not a range, on every master-eligible node. + + Defaults to `9300-9400`. + + +$$$remote_cluster.port$$$ + +`remote_cluster.port` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) The port to bind for remote cluster client communication. Accepts a single value. + + Defaults to `9443`. + + + +## Special values for network addresses [network-interface-values] + +You can configure {{es}} to automatically determine its addresses by using the following special values. Use these values when configuring `network.host`, `network.bind_host`, `network.publish_host`, and the corresponding settings for the HTTP and transport interfaces. + +`_local_` +: Any loopback addresses on the system, for example `127.0.0.1`. + +`_site_` +: Any site-local addresses on the system, for example `192.168.0.1`. + +`_global_` +: Any globally-scoped addresses on the system, for example `8.8.8.8`. + +`_[networkInterface]_` +: Use the addresses of the network interface called `[networkInterface]`. For example if you wish to use the addresses of an interface called `en0` then set `network.host: _en0_`. + +`0.0.0.0` +: The addresses of all available network interfaces. + +::::{note} +In some systems these special values resolve to multiple addresses. If so, {{es}} will select one of them as its publish address and may change its selection on each node restart. Ensure your node is accessible at every possible address. +:::: + + +::::{note} +Any values containing a `:` (e.g. an IPv6 address or some of the [special values](#network-interface-values)) must be quoted because `:` is a special character in YAML. +:::: + + +### IPv4 vs IPv6 [network-interface-values-ipv4-vs-ipv6] + +These special values yield both IPv4 and IPv6 addresses by default, but you can also add an `:ipv4` or `:ipv6` suffix to limit them to just IPv4 or IPv6 addresses respectively. For example, `network.host: "_en0:ipv4_"` would set this node’s addresses to the IPv4 addresses of interface `en0`. + +::::{admonition} Discovery in the Cloud +:class: tip + +More special settings are available when running in the Cloud with either the [EC2 discovery plugin](/reference/elasticsearch-plugins/discovery-ec2.md) or the [Google Compute Engine discovery plugin](/reference/elasticsearch-plugins/discovery-gce-network-host.md) installed. + +:::: + + + + +## Binding and publishing [modules-network-binding-publishing] + +{{es}} uses network addresses for two distinct purposes known as binding and publishing. Most nodes will use the same address for everything, but more complicated setups may need to configure different addresses for different purposes. + +When an application such as {{es}} wishes to receive network communications, it must indicate to the operating system the address or addresses whose traffic it should receive. This is known as *binding* to those addresses. {{es}} can bind to more than one address if needed, but most nodes only bind to a single address. {{es}} can only bind to an address if it is running on a host that has a network interface with that address. If necessary, you can configure the transport and HTTP interfaces to bind to different addresses. + +Each {{es}} node has an address at which clients and other nodes can contact it, known as its *publish address*. Each node has one publish address for its HTTP interface and one for its transport interface. These two addresses can be anything, and don’t need to be addresses of the network interfaces on the host. The only requirements are that each node must be: + +* Accessible at its HTTP publish address by all clients that will discover it using sniffing. +* Accessible at its transport publish address by all other nodes in its cluster, and by any remote clusters that will discover it using [sniff mode](docs-content://deploy-manage/remote-clusters/remote-clusters-self-managed.md#sniff-mode). + +Each node must have its own distinct publish address. + +If you specify the transport publish address using a hostname then {{es}} will resolve this hostname to an IP address once during startup, and other nodes will use the resulting IP address instead of resolving the name again themselves. You must use a hostname such that all of the addresses to which it resolves are addresses at which the node is accessible from all other nodes. To avoid confusion, it is simplest to use a hostname which resolves to a single address. + +If you specify the transport publish address using a [special value](#network-interface-values) then {{es}} will resolve this value to a single IP address during startup, and other nodes will use the resulting IP address instead of resolving the value again themselves. You must use a value such that all of the addresses to which it resolves are addresses at which the node is accessible from all other nodes. To avoid confusion, it is simplest to use a value which resolves to a single address. It is usually a mistake to use `0.0.0.0` as a publish address on hosts with more than one network interface. + +### Using a single address [_using_a_single_address] + +The most common configuration is for {{es}} to bind to a single address at which it is accessible to clients and other nodes. To use this configuration, set only `network.host` to the desired address. Do not separately set any bind or publish addresses. Do not separately specify the addresses for the HTTP or transport interfaces. + + +### Using multiple addresses [_using_multiple_addresses] + +Use the [advanced network settings](#advanced-network-settings) if you wish to bind {{es}} to multiple addresses, or to publish a different address from the addresses to which you are binding. Set `network.bind_host` to the bind addresses, and `network.publish_host` to the address at which this node is exposed. In complex configurations, you can configure these addresses differently for the HTTP and transport interfaces. + + + +## Advanced network settings [advanced-network-settings] + +These advanced settings let you bind to multiple addresses, or to use different addresses for binding and publishing. They are not required in most cases and you should not use them if you can use the [commonly used settings](#common-network-settings) instead. + +`network.bind_host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) The network address(es) to which the node should bind in order to listen for incoming connections. Accepts a list of IP addresses, hostnames, and [special values](#network-interface-values). Defaults to the address given by `network.host`. Use this setting only if binding to multiple addresses or using different addresses for publishing and binding. + +`network.publish_host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) The network address that clients and other nodes can use to contact this node. Accepts an IP address, a hostname, or a [special value](#network-interface-values). Defaults to the address given by `network.host`. Use this setting only if binding to multiple addresses or using different addresses for publishing and binding. + +::::{note} +You can specify a list of addresses for `network.host` and `network.publish_host`. You can also specify one or more hostnames or [special values](#network-interface-values) that resolve to multiple addresses. If you do this then {{es}} chooses one of the addresses for its publish address. This choice uses heuristics based on IPv4/IPv6 stack preference and reachability and may change when the node restarts. Ensure each node is accessible at all possible publish addresses. +:::: + + +### Advanced TCP settings [tcp-settings] + +Use the following settings to control the low-level parameters of the TCP connections used by the HTTP and transport interfaces. + +`network.tcp.keep_alive` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures the `SO_KEEPALIVE` option for network sockets, which determines whether each connection sends TCP keepalive probes. Defaults to `true`. + +`network.tcp.keep_idle` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Configures the `TCP_KEEPIDLE` option for network sockets, which determines the time in seconds that a connection must be idle before starting to send TCP keepalive probes. Defaults to `-1`, which means to use the system default. This value cannot exceed `300` seconds. Only applicable on Linux and macOS. + +`network.tcp.keep_interval` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Configures the `TCP_KEEPINTVL` option for network sockets, which determines the time in seconds between sending TCP keepalive probes. Defaults to `-1`, which means to use the system default. This value cannot exceed `300` seconds. Only applicable on Linux and macOS. + +`network.tcp.keep_count` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Configures the `TCP_KEEPCNT` option for network sockets, which determines the number of unacknowledged TCP keepalive probes that may be sent on a connection before it is dropped. Defaults to `-1`, which means to use the system default. Only applicable on Linux and macOS. + +`network.tcp.no_delay` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures the `TCP_NODELAY` option on network sockets, which determines whether [TCP no delay](https://en.wikipedia.org/wiki/Nagle%27s_algorithm) is enabled. Defaults to `true`. + +`network.tcp.reuse_address` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures the `SO_REUSEADDR` option for network sockets, which determines whether the address can be reused or not. Defaults to `false` on Windows and `true` otherwise. + +`network.tcp.send_buffer_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units)) Configures the size of the TCP send buffer for network sockets. Defaults to `-1` which means to use the system default. + +`network.tcp.receive_buffer_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units)) Configures the size of the TCP receive buffer. Defaults to `-1` which means to use the system default. + + + +## Advanced HTTP settings [http-settings] + +Use the following advanced settings to configure the HTTP interface independently of the [transport interface](#transport-settings). You can also configure both interfaces together using the [network settings](#common-network-settings). + +`http.host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) Sets the address of this node for HTTP traffic. The node will bind to this address and will also use it as its HTTP publish address. Accepts an IP address, a hostname, or a [special value](#network-interface-values). Use this setting only if you require different configurations for the transport and HTTP interfaces. + + Defaults to the address given by `network.host`. However, note that [security auto-configuration](docs-content://deploy-manage/deploy/self-managed/installing-elasticsearch.md) will add `http.host: 0.0.0.0` to your `elasticsearch.yml` configuration file, which overrides this default. + + +`http.bind_host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) The network address(es) to which the node should bind in order to listen for incoming HTTP connections. Accepts a list of IP addresses, hostnames, and [special values](#network-interface-values). Defaults to the address given by `http.host` or `network.bind_host`. Use this setting only if you require to bind to multiple addresses or to use different addresses for publishing and binding, and you also require different binding configurations for the transport and HTTP interfaces. + +`http.publish_host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) The network address for HTTP clients to contact the node using sniffing. Accepts an IP address, a hostname, or a [special value](#network-interface-values). Defaults to the address given by `http.host` or `network.publish_host`. Use this setting only if you require to bind to multiple addresses or to use different addresses for publishing and binding, and you also require different binding configurations for the transport and HTTP interfaces. + +`http.publish_port` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) The port of the [HTTP publish address](#modules-network-binding-publishing). Configure this setting only if you need the publish port to be different from `http.port`. Defaults to the port assigned via `http.port`. + +`http.max_content_length` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units)) Maximum size of an HTTP request body. If the body is compressed, the limit applies to the HTTP request body size before compression. Defaults to `100mb`. Configuring this setting to greater than `100mb` can cause cluster instability and is not recommended. If you hit this limit when sending a request to the [Bulk](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-bulk) API, configure your client to send fewer documents in each bulk request. If you wish to index individual documents that exceed `100mb`, pre-process them into smaller documents before sending them to {{es}}. For instance, store the raw data in a system outside {{es}} and include a link to the raw data in the documents that {{es}} indexes. + +`http.max_initial_line_length` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units)) Maximum size of an HTTP URL. Defaults to `4kb`. + +`http.max_header_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units)) Maximum size of allowed headers. Defaults to `16kb`. + +$$$http-compression$$$ + +`http.compression` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Support for compression when possible (with Accept-Encoding). If HTTPS is enabled, defaults to `false`. Otherwise, defaults to `true`. + + Disabling compression for HTTPS mitigates potential security risks, such as a [BREACH attack](https://en.wikipedia.org/wiki/BREACH). To compress HTTPS traffic, you must explicitly set `http.compression` to `true`. + + +`http.compression_level` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Defines the compression level to use for HTTP responses. Valid values are in the range of 1 (minimum compression) and 9 (maximum compression). Defaults to `3`. + +$$$http-cors-enabled$$$ + +`http.cors.enabled` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Enable or disable cross-origin resource sharing, which determines whether a browser on another origin can execute requests against {{es}}. Set to `true` to enable {{es}} to process pre-flight [CORS](https://en.wikipedia.org/wiki/Cross-origin_resource_sharing) requests. {{es}} will respond to those requests with the `Access-Control-Allow-Origin` header if the `Origin` sent in the request is permitted by the `http.cors.allow-origin` list. Set to `false` (the default) to make {{es}} ignore the `Origin` request header, effectively disabling CORS requests because {{es}} will never respond with the `Access-Control-Allow-Origin` response header. + + ::::{note} + If the client does not send a pre-flight request with an `Origin` header or it does not check the response headers from the server to validate the `Access-Control-Allow-Origin` response header, then cross-origin security is compromised. If CORS is not enabled on {{es}}, the only way for the client to know is to send a pre-flight request and realize the required response headers are missing. + :::: + + +$$$http-cors-allow-origin$$$ + +`http.cors.allow-origin` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) Which origins to allow. If you prepend and append a forward slash (`/`) to the value, this will be treated as a regular expression, allowing you to support HTTP and HTTPs. For example, using `/https?:\/\/localhost(:[0-9]+)?/` would return the request header appropriately in both cases. Defaults to no origins allowed. + + ::::{important} + A wildcard (`*`) is a valid value but is considered a security risk, as your {{es}} instance is open to cross origin requests from **anywhere**. + :::: + + +$$$http-cors-max-age$$$ + +`http.cors.max-age` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Browsers send a "preflight" OPTIONS-request to determine CORS settings. `max-age` defines for how long, in seconds, the result should be cached. Defaults to `1728000` (20 days). + +$$$http-cors-allow-methods$$$ + +`http.cors.allow-methods` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) Which methods to allow. Defaults to `OPTIONS, HEAD, GET, POST, PUT, DELETE`. + +$$$http-cors-allow-headers$$$ + +`http.cors.allow-headers` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) Which headers to allow. Defaults to `X-Requested-With, Content-Type, Content-Length, Authorization, Accept, User-Agent, X-Elastic-Client-Meta`. + +$$$http-cors-expose-headers$$$ + +`http.cors.expose-headers` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Which response headers to expose in the client. Defaults to `X-elastic-product`. + +$$$http-cors-allow-credentials$$$ + +`http.cors.allow-credentials` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Whether the `Access-Control-Allow-Credentials` header should be returned. Defaults to `false`. + + ::::{note} + This header is only returned when the setting is set to `true`. + :::: + + +`http.detailed_errors.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures whether detailed error reporting in HTTP responses is enabled. Defaults to `true`. When this option is set to `false`, only basic information is returned if an error occurs in the request, and requests with [`?error_trace` parameter](/reference/elasticsearch/rest-apis/common-options.md#common-options-error-options) set are rejected. + +`http.pipelining.max_events` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) The maximum number of events to be queued up in memory before an HTTP connection is closed, defaults to `10000`. + +`http.max_warning_header_count` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) The maximum number of warning headers in client HTTP responses. Defaults to `-1` which means the number of warning headers is unlimited. + +`http.max_warning_header_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units)) The maximum total size of warning headers in client HTTP responses. Defaults to `-1` which means the size of the warning headers is unlimited. + +`http.tcp.keep_alive` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures the `SO_KEEPALIVE` option for this socket, which determines whether it sends TCP keepalive probes. Defaults to `network.tcp.keep_alive`. + +`http.tcp.keep_idle` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Configures the `TCP_KEEPIDLE` option for HTTP sockets, which determines the time in seconds that a connection must be idle before starting to send TCP keepalive probes. Defaults to `network.tcp.keep_idle`, which uses the system default. This value cannot exceed `300` seconds. Only applicable on Linux and macOS. + +`http.tcp.keep_interval` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Configures the `TCP_KEEPINTVL` option for HTTP sockets, which determines the time in seconds between sending TCP keepalive probes. Defaults to `network.tcp.keep_interval`, which uses the system default. This value cannot exceed `300` seconds. Only applicable on Linux and macOS. + +`http.tcp.keep_count` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Configures the `TCP_KEEPCNT` option for HTTP sockets, which determines the number of unacknowledged TCP keepalive probes that may be sent on a connection before it is dropped. Defaults to `network.tcp.keep_count`, which uses the system default. Only applicable on Linux and macOS. + +`http.tcp.no_delay` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures the `TCP_NODELAY` option on HTTP sockets, which determines whether [TCP no delay](https://en.wikipedia.org/wiki/Nagle%27s_algorithm) is enabled. Defaults to `true`. + +`http.tcp.reuse_address` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures the `SO_REUSEADDR` option for HTTP sockets, which determines whether the address can be reused or not. Defaults to `false` on Windows and `true` otherwise. + +`http.tcp.send_buffer_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units)) The size of the TCP send buffer for HTTP traffic. Defaults to `network.tcp.send_buffer_size`. + +`http.tcp.receive_buffer_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units)) The size of the TCP receive buffer for HTTP traffic. Defaults to `network.tcp.receive_buffer_size`. + +`http.client_stats.enabled` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), boolean) Enable or disable collection of HTTP client stats. Defaults to `true`. + +`http.client_stats.closed_channels.max_count` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) When `http.client_stats.enabled` is `true`, sets the maximum number of closed HTTP channels for which {{es}} reports statistics. Defaults to `10000`. + +`http.client_stats.closed_channels.max_age` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [time value](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) When `http.client_stats.enabled` is `true`, sets the maximum length of time after closing a HTTP channel that {{es}} will report that channel’s statistics. Defaults to `5m`. + +### HTTP client configuration [_http_client_configuration] + +Many HTTP clients and proxies are configured for browser-like response latency and impose a fairly short timeout by default, reporting a failure if {{es}} takes longer than this timeout to complete the processing of a request. {{es}} will always eventually respond to every request, but some requests may require many minutes of processing time to complete. Consider carefully whether your client’s default response timeout is appropriate for your needs. In many cases it is better to wait longer for a response instead of failing, and this means you should disable any response timeouts: + +* If you react to a timeout by retrying the request, the retry will often end up being placed at the back of the same queue which held the original request. It will therefore take longer to complete the processing of the request if you time out and retry instead of waiting more patiently. Retrying also imposes additional load on {{es}}. +* If a request is not idempotent and cannot be retried then failing the request is your last resort. Waiting more patiently for a response will usually allow the overall operation to succeed. + +If you disable the response timeout in your client, make sure to configure TCP keepalives instead. TCP keepalives are the recommended way to prevent a client from waiting indefinitely in the event of a network outage. + + + +## Advanced transport settings [transport-settings] + +Use the following advanced settings to configure the transport interface independently of the [HTTP interface](#http-settings). Use the [network settings](#common-network-settings) to configure both interfaces together. + +`transport.host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) Sets the address of this node for transport traffic. The node will bind to this address and will also use it as its transport publish address. Accepts an IP address, a hostname, or a [special value](#network-interface-values). Use this setting only if you require different configurations for the transport and HTTP interfaces. + + Defaults to the address given by `network.host`. + + +`transport.bind_host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) The network address(es) to which the node should bind in order to listen for incoming transport connections. Accepts a list of IP addresses, hostnames, and [special values](#network-interface-values). Defaults to the address given by `transport.host` or `network.bind_host`. Use this setting only if you require to bind to multiple addresses or to use different addresses for publishing and binding, and you also require different binding configurations for the transport and HTTP interfaces. + +`transport.publish_host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) The network address at which the node can be contacted by other nodes. Accepts an IP address, a hostname, or a [special value](#network-interface-values). Defaults to the address given by `transport.host` or `network.publish_host`. Use this setting only if you require to bind to multiple addresses or to use different addresses for publishing and binding, and you also require different binding configurations for the transport and HTTP interfaces. + +`transport.publish_port` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) The port of the [transport publish address](#modules-network-binding-publishing). Set this parameter only if you need the publish port to be different from `transport.port`. Defaults to the port assigned via `transport.port`. + +`transport.connect_timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [time value](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) The connect timeout for initiating a new connection (in time setting format). Defaults to `30s`. + +$$$transport-settings-compress$$$ + +`transport.compress` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) Determines which transport requests are compressed before sending them to another node. {{es}} will compress transport responses if and only if the corresponding request was compressed. See also `transport.compression_scheme`, which specifies the compression scheme which is used. Accepts the following values: + + `false` + : No transport requests are compressed. This option uses the most network bandwidth, but avoids the CPU overhead of compression and decompression. + + `indexing_data` + : Compresses only the raw indexing data sent between nodes during ingest, CCR following (excluding bootstrapping) and operations-based shard recovery (excluding file-based recovery which copies the raw Lucene data). This option is a good trade-off between network bandwidth savings and the extra CPU required for compression and decompression. This option is the default. + + `true` + : All transport requests are compressed. This option may perform better than `indexing_data` in terms of network bandwidth, but will require the most CPU for compression and decompression work. + + +$$$transport-settings-compression-scheme$$$ + +`transport.compression_scheme` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) Configures the compression scheme for requests which are selected for compression by to the `transport.compress` setting. Accepts either `deflate` or `lz4`, which offer different trade-offs between compression ratio and CPU usage. {{es}} will use the same compression scheme for responses as for the corresponding requests. Defaults to `lz4`. + +`transport.tcp.keep_alive` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures the `SO_KEEPALIVE` option for transport sockets, which determines whether they send TCP keepalive probes. Defaults to `network.tcp.keep_alive`. + +`transport.tcp.keep_idle` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Configures the `TCP_KEEPIDLE` option for transport sockets, which determines the time in seconds that a connection must be idle before starting to send TCP keepalive probes. Defaults to `network.tcp.keep_idle` if set, or the system default otherwise. This value cannot exceed `300` seconds. In cases where the system default is higher than `300`, the value is automatically lowered to `300`. Only applicable on Linux and macOS. + +`transport.tcp.keep_interval` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Configures the `TCP_KEEPINTVL` option for transport sockets, which determines the time in seconds between sending TCP keepalive probes. Defaults to `network.tcp.keep_interval` if set, or the system default otherwise. This value cannot exceed `300` seconds. In cases where the system default is higher than `300`, the value is automatically lowered to `300`. Only applicable on Linux and macOS. + +`transport.tcp.keep_count` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Configures the `TCP_KEEPCNT` option for transport sockets, which determines the number of unacknowledged TCP keepalive probes that may be sent on a connection before it is dropped. Defaults to `network.tcp.keep_count` if set, or the system default otherwise. Only applicable on Linux and macOS. + +`transport.tcp.no_delay` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures the `TCP_NODELAY` option on transport sockets, which determines whether [TCP no delay](https://en.wikipedia.org/wiki/Nagle%27s_algorithm) is enabled. Defaults to `true`. + +`transport.tcp.reuse_address` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures the `SO_REUSEADDR` option for network sockets, which determines whether the address can be reused or not. Defaults to `network.tcp.reuse_address`. + +`transport.tcp.send_buffer_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units)) The size of the TCP send buffer for transport traffic. Defaults to `network.tcp.send_buffer_size`. + +`transport.tcp.receive_buffer_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units)) The size of the TCP receive buffer for transport traffic. Defaults to `network.tcp.receive_buffer_size`. + +`transport.ping_schedule` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [time value](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) Configures the time between sending application-level pings on all transport connections to promptly detect when a transport connection has failed. Defaults to `-1` meaning that application-level pings are not sent. You should use TCP keepalives (see `transport.tcp.keep_alive`) instead of application-level pings wherever possible. + +### Transport profiles [transport-profiles] + +Elasticsearch allows you to bind to multiple ports on different interfaces by the use of transport profiles. See this example configuration + +```yaml +transport.profiles.default.port: 9300-9400 +transport.profiles.default.bind_host: 10.0.0.1 +transport.profiles.client.port: 9500-9600 +transport.profiles.client.bind_host: 192.168.0.1 +transport.profiles.dmz.port: 9700-9800 +transport.profiles.dmz.bind_host: 172.16.1.2 +``` + +The `default` profile is special. It is used as a fallback for any other profiles, if those do not have a specific configuration setting set, and is how this node connects to other nodes in the cluster. Other profiles can have any name and can be used to set up specific endpoints for incoming connections. + +The following parameters can be configured on each transport profile, as in the example above: + +* `port`: The port to which to bind. +* `bind_host`: The host to which to bind. +* `publish_host`: The host which is published in informational APIs. + +Profiles also support all the other transport settings specified in the [transport settings](#transport-settings) section, and use these as defaults. For example, `transport.profiles.client.tcp.reuse_address` can be explicitly configured, and defaults otherwise to `transport.tcp.reuse_address`. + + +### Long-lived idle connections [long-lived-connections] + +A transport connection between two nodes is made up of a number of long-lived TCP connections, some of which may be idle for an extended period of time. Nonetheless, {{es}} requires these connections to remain open, and it can disrupt the operation of your cluster if any inter-node connections are closed by an external influence such as a firewall. It is important to configure your network to preserve long-lived idle connections between {{es}} nodes, for instance by leaving `*.tcp.keep_alive` enabled and ensuring that the keepalive interval is shorter than any timeout that might cause idle connections to be closed, or by setting `transport.ping_schedule` if keepalives cannot be configured. Devices which drop connections when they reach a certain age are a common source of problems to {{es}} clusters, and must not be used. + +For information about troubleshooting unexpected network disconnections, see [Diagnosing other network disconnections](docs-content://troubleshoot/elasticsearch/troubleshooting-unstable-cluster.md#troubleshooting-unstable-cluster-network). + + +### Request compression [request-compression] + +The default `transport.compress` configuration option `indexing_data` will only compress requests that relate to the transport of raw indexing source data between nodes. This option primarily compresses data sent during ingest, ccr, and shard recovery. This default normally makes sense for local cluster communication as compressing raw documents tends significantly reduce inter-node network usage with minimal CPU impact. + +The `transport.compress` setting always configures local cluster request compression and is the fallback setting for remote cluster request compression. If you want to configure remote request compression differently than local request compression, you can set it on a per-remote cluster basis using the [`cluster.remote.${cluster_alias}.transport.compress` setting](docs-content://deploy-manage/remote-clusters/remote-clusters-settings.md). + + +### Response compression [response-compression] + +The compression settings do not configure compression for responses. {{es}} will compress a response if the inbound request was compressed—​even when compression is not enabled. Similarly, {{es}} will not compress a response if the inbound request was uncompressed—​even when compression is enabled. The compression scheme used to compress a response will be the same scheme the remote node used to compress the request. + + + +## Advanced remote cluster (API key based model) settings [remote-cluster-network-settings] + +Use the following advanced settings to configure the remote cluster interface (API key based model) independently of the [transport interface](#transport-settings). You can also configure both interfaces together using the [network settings](#common-network-settings). + +`remote_cluster_server.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Determines whether the remote cluster server should be enabled. This setting must be `true` for `remote_cluster.port` and all following remote cluster settings to take effect. Enabling it allows the cluster to serve cross-cluster requests using the API key based model. Defaults to `false`. + +`remote_cluster.host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) Sets the address of this node for remote cluster server traffic. The node will bind to this address and will also use it as its remote cluster server publish address. Accepts an IP address, a hostname, or a [special value](#network-interface-values). Use this setting only if you require different configurations for the remote cluster server and transport interfaces. + + Defaults to the address given by `transport.bind_host`. + + +`remote_cluster.bind_host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) The network address(es) to which the node should bind in order to listen for incoming remote cluster connections. Accepts a list of IP addresses, hostnames, and [special values](#network-interface-values). Defaults to the address given by `remote_cluster.host`. Use this setting only if you require to bind to multiple addresses or to use different addresses for publishing and binding, and you also require different binding configurations for the remote cluster server and transport interfaces. + +`remote_cluster.publish_host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), string) The network address at which the node can be contacted by other nodes. Accepts an IP address, a hostname, or a [special value](#network-interface-values). Defaults to the address given by `remote_cluster.host`. Use this setting only if you require to bind to multiple addresses or to use different addresses for publishing and binding, and you also require different binding configurations for the remote cluster server and transport interfaces. + +`remote_cluster.publish_port` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) The port of the [remote cluster server publish address](#modules-network-binding-publishing). Set this parameter only if you need the publish port to be different from `remote_cluster.port`. Defaults to the port assigned via `remote_cluster.port`. + +`remote_cluster.tcp.keep_alive` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures the `SO_KEEPALIVE` option for remote cluster sockets, which determines whether they send TCP keepalive probes. Defaults to `transport.tcp.keep_alive`. + +`remote_cluster.tcp.keep_idle` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Configures the `TCP_KEEPIDLE` option for transport sockets, which determines the time in seconds that a connection must be idle before starting to send TCP keepalive probes. Defaults to `transport.tcp.keep_idle` if set, or the system default otherwise. This value cannot exceed `300` seconds. In cases where the system default is higher than `300`, the value is automatically lowered to `300`. Only applicable on Linux and macOS. + +`remote_cluster.tcp.keep_interval` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Configures the `TCP_KEEPINTVL` option for transport sockets, which determines the time in seconds between sending TCP keepalive probes. Defaults to `transport.tcp.keep_interval` if set, or the system default otherwise. This value cannot exceed `300` seconds. In cases where the system default is higher than `300`, the value is automatically lowered to `300`. Only applicable on Linux and macOS. + +`remote_cluster.tcp.keep_count` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Configures the `TCP_KEEPCNT` option for transport sockets, which determines the number of unacknowledged TCP keepalive probes that may be sent on a connection before it is dropped. Defaults to `transport.tcp.keep_count` if set, or the system default otherwise. Only applicable on Linux and macOS. + +`remote_cluster.tcp.no_delay` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures the `TCP_NODELAY` option on transport sockets, which determines whether [TCP no delay](https://en.wikipedia.org/wiki/Nagle%27s_algorithm) is enabled. Defaults to `transport.tcp.no_delay`. + +`remote_cluster.tcp.reuse_address` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), boolean) Configures the `SO_REUSEADDR` option for network sockets, which determines whether the address can be reused or not. Defaults to `transport.tcp.reuse_address`. + +`remote_cluster.tcp.send_buffer_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units)) The size of the TCP send buffer for transport traffic. Defaults to `transport.tcp.send_buffer_size`. + +`remote_cluster.tcp.receive_buffer_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [byte value](/reference/elasticsearch/rest-apis/api-conventions.md#byte-units)) The size of the TCP receive buffer for transport traffic. Defaults to `transport.tcp.receive_buffer_size`. + + +## Request tracing [_request_tracing] + +You can trace individual requests made on the HTTP and transport layers. + +::::{warning} +Tracing can generate extremely high log volumes that can destabilize your cluster. Do not enable request tracing on busy or important clusters. +:::: + + +### REST request tracer [http-rest-request-tracer] + +The HTTP layer has a dedicated tracer that logs incoming requests and the corresponding outgoing responses. Activate the tracer by setting the level of the `org.elasticsearch.http.HttpTracer` logger to `TRACE`: + +```console +PUT _cluster/settings +{ + "persistent" : { + "logger.org.elasticsearch.http.HttpTracer" : "TRACE" + } +} +``` + +You can also control which URIs will be traced, using a set of include and exclude wildcard patterns. By default every request will be traced. + +```console +PUT _cluster/settings +{ + "persistent" : { + "http.tracer.include" : "*", + "http.tracer.exclude" : "" + } +} +``` + +By default, the tracer logs a summary of each request and response which matches these filters. To record the body of each request and response too, set the system property `es.insecure_network_trace_enabled` to `true`, and then set the levels of both the `org.elasticsearch.http.HttpTracer` and `org.elasticsearch.http.HttpBodyTracer` loggers to `TRACE`: + +```console +PUT _cluster/settings +{ + "persistent" : { + "logger.org.elasticsearch.http.HttpTracer" : "TRACE", + "logger.org.elasticsearch.http.HttpBodyTracer" : "TRACE" + } +} +``` + +Each message body is compressed, encoded, and split into chunks to avoid truncation: + +```text +[TRACE][o.e.h.HttpBodyTracer ] [master] [276] response body [part 1]: H4sIAAAAAAAA/9... +[TRACE][o.e.h.HttpBodyTracer ] [master] [276] response body [part 2]: 2oJ93QyYLWWhcD... +[TRACE][o.e.h.HttpBodyTracer ] [master] [276] response body (gzip compressed, base64-encoded, and split into 2 parts on preceding log lines) +``` + +Each chunk is annotated with an internal request ID (`[276]` in this example) which you should use to correlate the chunks with the corresponding summary lines. To reconstruct the output, base64-decode the data and decompress it using `gzip`. For instance, on Unix-like systems: + +```sh +cat httptrace.log | sed -e 's/.*://' | base64 --decode | gzip --decompress +``` + +::::{warning} +HTTP request and response bodies may contain sensitive information such as credentials and keys, so HTTP body tracing is disabled by default. You must explicitly enable it on each node by setting the system property `es.insecure_network_trace_enabled` to `true`. This feature is primarily intended for test systems which do not contain any sensitive information. If you set this property on a system which contains sensitive information, you must protect your logs from unauthorized access. +:::: + + + +### Transport tracer [transport-tracer] + +The transport layer has a dedicated tracer that logs incoming and outgoing requests and responses. Activate the tracer by setting the level of the `org.elasticsearch.transport.TransportService.tracer` logger to `TRACE`: + +```console +PUT _cluster/settings +{ + "persistent" : { + "logger.org.elasticsearch.transport.TransportService.tracer" : "TRACE" + } +} +``` + +You can also control which actions will be traced, using a set of include and exclude wildcard patterns. By default every request will be traced except for fault detection pings: + +```console +PUT _cluster/settings +{ + "persistent" : { + "transport.tracer.include" : "*", + "transport.tracer.exclude" : "internal:coordination/fault_detection/*" + } +} +``` + + + +## Networking threading model [modules-network-threading-model] + +This section describes the threading model used by the networking subsystem in {{es}}. This information isn’t required to use {{es}}, but it may be useful to advanced users who are diagnosing network problems in a cluster. + +{{es}} nodes communicate over a collection of TCP channels that together form a transport connection. {{es}} clients communicate with the cluster over HTTP, which also uses one or more TCP channels. Each of these TCP channels is owned by exactly one of the `transport_worker` threads in the node. This owning thread is chosen when the channel is opened and remains the same for the lifetime of the channel. + +Each `transport_worker` thread has sole responsibility for sending and receiving data over the channels it owns. Additionally, each http and transport server socket is assigned to one of the `transport_worker` threads. That worker has the responsibility of accepting new incoming connections to the server socket it owns. + +If a thread in {{es}} wants to send data over a particular channel, it passes the data to the owning `transport_worker` thread for the actual transmission. + +Normally the `transport_worker` threads will not completely handle the messages they receive. Instead, they will do a small amount of preliminary processing and then dispatch (hand off) the message to a different [threadpool](/reference/elasticsearch/configuration-reference/thread-pool-settings.md) for the rest of their handling. For instance, bulk messages are dispatched to the `write` threadpool, searches are dispatched to one of the `search` threadpools, and requests for statistics and other management tasks are mostly dispatched to the `management` threadpool. However in some cases the processing of a message is expected to be so quick that {{es}} will do all of the processing on the `transport_worker` thread rather than incur the overhead of dispatching it elsewhere. + +By default, there is one `transport_worker` thread per CPU. In contrast, there may sometimes be tens-of-thousands of TCP channels. If data arrives on a TCP channel and its owning `transport_worker` thread is busy, the data isn’t processed until the thread finishes whatever it is doing. Similarly, outgoing data are not sent over a channel until the owning `transport_worker` thread is free. This means that we require every `transport_worker` thread to be idle frequently. An idle `transport_worker` looks something like this in a stack dump: + +```text +"elasticsearch[instance-0000000004][transport_worker][T#1]" #32 daemon prio=5 os_prio=0 cpu=9645.94ms elapsed=501.63s tid=0x00007fb83b6307f0 nid=0x1c4 runnable [0x00007fb7b8ffe000] + java.lang.Thread.State: RUNNABLE + at sun.nio.ch.EPoll.wait(java.base@17.0.2/Native Method) + at sun.nio.ch.EPollSelectorImpl.doSelect(java.base@17.0.2/EPollSelectorImpl.java:118) + at sun.nio.ch.SelectorImpl.lockAndDoSelect(java.base@17.0.2/SelectorImpl.java:129) + - locked <0x00000000c443c518> (a sun.nio.ch.Util$2) + - locked <0x00000000c38f7700> (a sun.nio.ch.EPollSelectorImpl) + at sun.nio.ch.SelectorImpl.select(java.base@17.0.2/SelectorImpl.java:146) + at io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:813) + at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:460) + at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:986) + at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) + at java.lang.Thread.run(java.base@17.0.2/Thread.java:833) +``` + +In the [Nodes hot threads](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-nodes-hot-threads) API an idle `transport_worker` thread is reported like this: + +```text + 0.0% [cpu=0.0%, idle=100.0%] (500ms out of 500ms) cpu usage by thread 'elasticsearch[instance-0000000004][transport_worker][T#1]' + 10/10 snapshots sharing following 9 elements + java.base@17.0.2/sun.nio.ch.EPoll.wait(Native Method) + java.base@17.0.2/sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:118) + java.base@17.0.2/sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:129) + java.base@17.0.2/sun.nio.ch.SelectorImpl.select(SelectorImpl.java:146) + io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:813) + io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:460) + io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:986) + io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) + java.base@17.0.2/java.lang.Thread.run(Thread.java:833) +``` + +Note that `transport_worker` threads should always be in state `RUNNABLE`, even when waiting for input, because they block in the native `EPoll#wait` method. The `idle=` time reports the proportion of time the thread spent waiting for input, whereas the `cpu=` time reports the proportion of time the thread spent processing input it has received. + +If a `transport_worker` thread is not frequently idle, it may build up a backlog of work. This can cause delays in processing messages on the channels that it owns. It’s hard to predict exactly which work will be delayed: + +* There are many more channels than threads. If work related to one channel is causing delays to its worker thread, all other channels owned by that thread will also suffer delays. +* The mapping from TCP channels to worker threads is fixed but arbitrary. Each channel is assigned an owning thread in a round-robin fashion when the channel is opened. Each worker thread is responsible for many different kinds of channel. +* There are many channels open between each pair of nodes. For each request, {{es}} will choose from the appropriate channels in a round-robin fashion. Some requests may end up on a channel owned by a delayed worker while other identical requests will be sent on a channel that’s working smoothly. + +If the backlog builds up too far, some messages may be delayed by many seconds. The node might even [fail its health checks](docs-content://deploy-manage/distributed-architecture/discovery-cluster-formation/cluster-fault-detection.md) and be removed from the cluster. Sometimes, you can find evidence of busy `transport_worker` threads using the [Nodes hot threads](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-nodes-hot-threads) API. However, this API itself sends network messages so may not work correctly if the `transport_worker` threads are too busy. It is more reliable to use `jstack` to obtain stack dumps or use Java Flight Recorder to obtain a profiling trace. These tools are independent of any work the JVM is performing. + +It may also be possible to identify some reasons for delays from the server logs. See for instance the following loggers: + +`org.elasticsearch.transport.InboundHandler` +: This logger reports a warning if processing an inbound message occupies a network thread for unreasonably long, which is almost certainly a bug. The warning includes some information which can be used to identify the message that took unreasonably long to process. + +`org.elasticsearch.transport.OutboundHandler` +: This logger reports a warning if sending an outbound message takes longer than expected. This duration includes time spent waiting for network congestion to clear, and time spent processing other work on the same network thread, so does not always indicate the presence of a bug related to the outbound message specified in the log entry. + +`org.elasticsearch.common.network.ThreadWatchdog` +: This logger reports a warning and a thread dump when it notices that a network thread has not made progress between two consecutive checks, which is almost certainly a bug: + + ```text + [WARN ][o.e.c.n.ThreadWatchdog ] the following threads are active but did not make progress in the preceding [5s]: [elasticsearch[instance-0000000004][transport_worker][T#1]]] + [WARN ][o.e.c.n.ThreadWatchdog ] hot threads dump due to active threads not making progress [part 1]: H4sIAAAAAAAA/+1aa2/bOBb93l8hYLUYFWgYvWw5AQbYpEkn6STZbJyiwAwGA1qiY8US6ZJUHvPr90qk/JJky41TtDMuUIci... + [WARN ][o.e.c.n.ThreadWatchdog ] hot threads dump due to active threads not making progress [part 2]: LfXL/x70a3eL8ve6Ral74ZBrp5x7HmUD9KXQz1MaXUNfFC6SeEysxSw1cNXL9JXYl3AigAE7ywbm/AZ+ll3Ox4qXJHNjVr6h... + [WARN ][o.e.c.n.ThreadWatchdog ] hot threads dump due to active threads not making progress (gzip compressed, base64-encoded, and split into 2 parts on preceding log lines; ... + ``` + + To reconstruct the thread dump, base64-decode the data and decompress it using `gzip`. For instance, on Unix-like systems: + + ```sh + cat watchdog.log | sed -e 's/.*://' | base64 --decode | gzip --decompress + ``` + + This mechanism can be controlled with the following settings: + + `network.thread.watchdog.interval` + : ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [time value](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) Defines the interval between watchdog checks. Defaults to `5s`. Set to `0` to disable the network thread watchdog. + + `network.thread.watchdog.quiet_time` + : ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), [time value](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) Defines the interval between watchdog warnings. Defaults to `10m`. + + + +## TCP readiness port [tcp-readiness-port] + +::::{warning} +This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. +:::: + + +If configured, a node can open a TCP port when the node is in a ready state. A node is deemed ready when it has successfully joined a cluster. In a single node configuration, the node is said to be ready, when it’s able to accept requests. + +To enable the readiness TCP port, use the `readiness.port` setting. The readiness service will bind to all host addresses. + +If the node leaves the cluster, or the [Shutdown API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-shutdown-put-node) is used to mark the node for shutdown, the readiness port is immediately closed. + +A successful connection to the readiness TCP port signals that the {{es}} node is ready. When a client connects to the readiness port, the server simply terminates the socket connection. No data is sent back to the client. If a client cannot connect to the readiness port, the node is not ready. + + diff --git a/docs/reference/elasticsearch/configuration-reference/node-query-cache-settings.md b/docs/reference/elasticsearch/configuration-reference/node-query-cache-settings.md new file mode 100644 index 0000000000000..8105e163cc7b2 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/node-query-cache-settings.md @@ -0,0 +1,28 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/query-cache.html +--- + +# Node query cache settings [query-cache] + +The results of queries used in the filter context are cached in the node query cache for fast lookup. There is one query cache per node that is shared by all shards. The cache uses an LRU eviction policy: when the cache is full, the least recently used query results are evicted to make way for new data. You cannot inspect the contents of the query cache. + +Term queries and queries used outside of a filter context are not eligible for caching. + +By default, the cache holds a maximum of 10000 queries in up to 10% of the total heap space. To determine if a query is eligible for caching, {{es}} maintains a query history to track occurrences. + +Caching is done on a per segment basis if a segment contains at least 10000 documents and the segment has at least 3% of the total documents of a shard. Because caching is per segment, merging segments can invalidate cached queries. + +The following setting is *static* and must be configured on every data node in the cluster: + +`indices.queries.cache.size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the memory size for the filter cache. Accepts either a percentage value, like `5%`, or an exact value, like `512mb`. Defaults to `10%`. + +## Query cache index settings [query-cache-index-settings] + +The following setting is an *index* setting that can be configured on a per-index basis. Can only be set at index creation time or on a [closed index](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-open): + +`index.queries.cache.enabled` +: ([Static](/reference/elasticsearch/index-settings/index.md)) Controls whether to enable query caching. Accepts `true` (default) or `false`. + + diff --git a/docs/reference/elasticsearch/configuration-reference/node-settings.md b/docs/reference/elasticsearch/configuration-reference/node-settings.md new file mode 100644 index 0000000000000..3a9634a37fc6b --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/node-settings.md @@ -0,0 +1,131 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html +--- + +# Node settings [modules-node] + +Any time that you start an instance of {{es}}, you are starting a *node*. A collection of connected nodes is called a [cluster](/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings.md). If you are running a single node of {{es}}, then you have a cluster of one node. + +Every node in the cluster can handle [HTTP and transport](/reference/elasticsearch/configuration-reference/networking-settings.md) traffic by default. The transport layer is used exclusively for communication between nodes; the HTTP layer is used by REST clients. + +$$$modules-node-description$$$ +All nodes know about all the other nodes in the cluster and can forward client requests to the appropriate node. + +::::{tip} +The performance of an {{es}} node is often limited by the performance of the underlying storage. Review our recommendations for optimizing your storage for [indexing](docs-content://deploy-manage/production-guidance/optimize-performance/indexing-speed.md#indexing-use-faster-hardware) and [search](docs-content://deploy-manage/production-guidance/optimize-performance/search-speed.md#search-use-faster-hardware). +:::: + + +## Node name setting [node-name-settings] + +{{es}} uses `node.name` as a human-readable identifier for a particular instance of {{es}}. This name is included in the response of many APIs. The node name defaults to the hostname of the machine when {{es}} starts, but can be configured explicitly in `elasticsearch.yml`: + +```yaml +node.name: prod-data-2 +``` + + +## Node role settings [node-roles] + +You define a node’s roles by setting `node.roles` in `elasticsearch.yml`. If you set `node.roles`, the node is only assigned the roles you specify. If you don’t set `node.roles`, the node is assigned the following roles: + +* $$$master-node$$$`master` +* $$$data-node$$$`data` +* `data_content` +* `data_hot` +* `data_warm` +* `data_cold` +* `data_frozen` +* `ingest` +* $$$ml-node$$$`ml` +* `remote_cluster_client` +* $$$transform-node$$$`transform` + +The following additional roles are available: + +* `voting_only` + +$$$coordinating-only-node$$$If you leave `node.roles` unset, then the node is considered to be a [coordinating only node](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#coordinating-only-node-role). + +::::{important} +If you set `node.roles`, ensure you specify every node role your cluster needs. Every cluster requires the following node roles: + +* `master` +* + + `data_content` and `data_hot`
OR
`data` + + +Some {{stack}} features also require specific node roles: + +* {{ccs-cap}} and {{ccr}} require the `remote_cluster_client` role. +* {{stack-monitor-app}} and ingest pipelines require the `ingest` role. +* {{fleet}}, the {{security-app}}, and {{transforms}} require the `transform` role. The `remote_cluster_client` role is also required to use {{ccs}} with these features. +* {{ml-cap}} features, such as {{anomaly-detect}}, require the `ml` role. + +:::: + + +As the cluster grows and in particular if you have large {{ml}} jobs or {{ctransforms}}, consider separating dedicated master-eligible nodes from dedicated data nodes, {{ml}} nodes, and {{transform}} nodes. + +To learn more about the available node roles, see [*Node roles*](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md). + + +## Node data path settings [_node_data_path_settings] + + +## `path.data` [data-path] + +Every data and master-eligible node requires access to a data directory where shards and index and cluster metadata will be stored. The `path.data` defaults to `$ES_HOME/data` but can be configured in the `elasticsearch.yml` config file an absolute path or a path relative to `$ES_HOME` as follows: + +```yaml +path.data: /var/elasticsearch/data +``` + +Like all node settings, it can also be specified on the command line as: + +```sh +./bin/elasticsearch -Epath.data=/var/elasticsearch/data +``` + +The contents of the `path.data` directory must persist across restarts, because this is where your data is stored. {{es}} requires the filesystem to act as if it were backed by a local disk, but this means that it will work correctly on properly-configured remote block devices (e.g. a SAN) and remote filesystems (e.g. NFS) as long as the remote storage behaves no differently from local storage. You can run multiple {{es}} nodes on the same filesystem, but each {{es}} node must have its own data path. + +::::{tip} +When using the `.zip` or `.tar.gz` distributions, the `path.data` setting should be configured to locate the data directory outside the {{es}} home directory, so that the home directory can be deleted without deleting your data! The RPM and Debian distributions do this for you already. +:::: + + +::::{warning} +Don’t modify anything within the data directory or run processes that might interfere with its contents. If something other than {{es}} modifies the contents of the data directory, then {{es}} may fail, reporting corruption or other data inconsistencies, or may appear to work correctly having silently lost some of your data. Don’t attempt to take filesystem backups of the data directory; there is no supported way to restore such a backup. Instead, use [Snapshot and restore](docs-content://deploy-manage/tools/snapshot-and-restore.md) to take backups safely. Don’t run virus scanners on the data directory. A virus scanner can prevent {{es}} from working correctly and may modify the contents of the data directory. The data directory contains no executables so a virus scan will only find false positives. +:::: + + + +## Custom node attributes [custom-node-attributes] + +If needed, you can add custom attributes to a node. These attributes can be used to [filter which nodes a shard can be allocated to](/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings.md#cluster-routing-settings), or to group nodes together for [shard allocation awareness](docs-content://deploy-manage/distributed-architecture/shard-allocation-relocation-recovery/shard-allocation-awareness.md). + +::::{tip} +You can also set a node attribute using the `-E` command line argument when you start a node: + +```sh +./bin/elasticsearch -Enode.attr.rack_id=rack_one +``` + +:::: + + +`node.attr.` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A custom attribute that you can assign to a node. For example, you might assign a `rack_id` attribute to each node to ensure that primary and replica shards are not allocated on the same rack. You can specify multiple attributes as a comma-separated list. + + +## Other node settings [other-node-settings] + +More node settings can be found in [*Configuring {{es}}*](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md) and [Important {{es}} configuration](docs-content://deploy-manage/deploy/self-managed/important-settings-configuration.md), including: + +* [`cluster.name`](/reference/elasticsearch/configuration-reference/miscellaneous-cluster-settings.md#cluster-name) +* [`node.name`](docs-content://deploy-manage/deploy/self-managed/important-settings-configuration.md#node-name) +* [network settings](/reference/elasticsearch/configuration-reference/networking-settings.md) + + diff --git a/docs/reference/elasticsearch/configuration-reference/search-settings.md b/docs/reference/elasticsearch/configuration-reference/search-settings.md new file mode 100644 index 0000000000000..09290a4d82703 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/search-settings.md @@ -0,0 +1,61 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-settings.html +--- + +# Search settings [search-settings] + +The following expert settings can be set to manage global search and aggregation limits. + +$$$indices-query-bool-max-clause-count$$$ + +`indices.query.bool.max_clause_count` +: [8.0.0] ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) This deprecated setting has no effect. + + {{es}} will now dynamically set the maximum number of allowed clauses in a query, using a heuristic based on the size of the search thread pool and the size of the heap allocated to the JVM. This limit has a minimum value of 1024 and will in most cases be larger (for example, a node with 30Gb RAM and 48 CPUs will have a maximum clause count of around 27,000). Larger heaps lead to higher values, and larger thread pools result in lower values. + + Queries with many clauses should be avoided whenever possible. If you previously bumped this setting to accommodate heavy queries, you might need to increase the amount of memory available to {{es}}, or to reduce the size of your search thread pool so that more memory is available to each concurrent search. + + In previous versions of Lucene you could get around this limit by nesting boolean queries within each other, but the limit is now based on the total number of leaf queries within the query as a whole and this workaround will no longer help. + + +$$$search-settings-max-buckets$$$ + +`search.max_buckets` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings), integer) Maximum number of [aggregation buckets](/reference/data-analysis/aggregations/bucket.md) allowed in a single response. Defaults to 65,536. + + Requests that attempt to return more than this limit will return an error. + + +$$$search-settings-only-allowed-scripts$$$ + +`search.aggs.only_allowed_metric_scripts` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings), boolean) Configures whether only explicitly allowed scripts can be used in [scripted metrics aggregations](/reference/data-analysis/aggregations/search-aggregations-metrics-scripted-metric-aggregation.md). Defaults to `false`. + + Requests using scripts not contained in either [`search.aggs.allowed_inline_metric_scripts`](/reference/elasticsearch/configuration-reference/search-settings.md#search-settings-allowed-inline-scripts) or [`search.aggs.allowed_stored_metric_scripts`](/reference/elasticsearch/configuration-reference/search-settings.md#search-settings-allowed-stored-scripts) will return an error. + + +$$$search-settings-allowed-inline-scripts$$$ + +`search.aggs.allowed_inline_metric_scripts` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings), list of strings) List of inline scripts that can be used in scripted metrics aggregations when [`search.aggs.only_allowed_metric_scripts`](#search-settings-only-allowed-scripts) is set to `true`. Defaults to an empty list. + + Requests using other inline scripts will return an error. + + +$$$search-settings-allowed-stored-scripts$$$ + +`search.aggs.allowed_stored_metric_scripts` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings), list of strings) List of ids of stored scripts that can be used in scripted metrics aggregations when [`search.aggs.only_allowed_metric_scripts`](#search-settings-only-allowed-scripts) is set to `true`. Defaults to an empty list. + + Requests using other stored scripts will return an error. + + +$$$indices-query-bool-max-nested-depth$$$ + +`indices.query.bool.max_nested_depth` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), integer) Maximum nested depth of queries. Defaults to `30`. + + This setting limits the nesting depth of queries. Deep nesting of queries may lead to stack overflow errors. + + diff --git a/docs/reference/elasticsearch/configuration-reference/security-settings.md b/docs/reference/elasticsearch/configuration-reference/security-settings.md new file mode 100644 index 0000000000000..af32520b797cc --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/security-settings.md @@ -0,0 +1,2392 @@ +--- +navigation_title: "Security settings" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/security-settings.html +--- + +# Security settings in {{es}} [security-settings] + + +You configure `xpack.security` settings to [enable anonymous access](#anonymous-access-settings) and perform message authentication, [set up document and field level security](#field-document-security-settings), [configure realms](#realm-settings), [encrypt communications with SSL](#ssl-tls-settings),and [audit security events](/reference/elasticsearch/configuration-reference/auding-settings.md). + +All of these settings can be added to the `elasticsearch.yml` configuration file, with the exception of the secure settings, which you add to the {{es}} keystore. For more information about creating and updating the {{es}} keystore, see [Secure settings](docs-content://deploy-manage/security/secure-settings.md). + + +### General security settings [general-security-settings] + +`xpack.security.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Defaults to `true`, which enables {{es}} {{security-features}} on the node. This setting must be enabled to use Elasticsearch’s authentication, authorization and audit features.
+ + If set to `false`, {{security-features}} are disabled, which is not recommended. It also affects all {{kib}} instances that connect to this {{es}} instance; you do not need to disable {{security-features}} in those `kibana.yml` files. For more information about disabling {{security-features}} in specific {{kib}} instances, see [{{kib}} security settings](kibana://docs/reference/configuration-reference/security-settings.md). + + +`xpack.security.autoconfiguration.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Defaults to `true`, which enables [security auto configuration](docs-content://deploy-manage/deploy/self-managed/installing-elasticsearch.md). + + If set to `false`, security auto configuration is disabled, which is not recommended. When disabled, security is not configured automatically when starting {{es}} for the first time, which means that you must [manually configure security](docs-content://deploy-manage/security/manually-configure-security-in-self-managed-cluster.md). + + +`xpack.security.enrollment.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Defaults to `false`. Controls enrollment (of nodes and {{kib}}) to a local node that’s been [autoconfigured for security](docs-content://deploy-manage/deploy/self-managed/installing-elasticsearch.md). When set to `true`, the local node can generate new enrollment tokens. Existing tokens can be used for enrollment if they are still valid. + + The security autoconfiguration process will set this to `true` unless an administrator sets it to `false` before starting {{es}}. + + +`xpack.security.hide_settings` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A comma-separated list of settings that are omitted from the results of the [cluster nodes info API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-nodes-info). You can use wildcards to include multiple settings in the list. For example, the following value hides all the settings for the ad1 active_directory realm: `xpack.security.authc.realms.active_directory.ad1.*`. The API already omits all `ssl` settings, `bind_dn`, and `bind_password` due to the sensitive nature of the information. + +`xpack.security.fips_mode.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Enables fips mode of operation. Set this to `true` if you run this {{es}} instance in a FIPS 140-2 enabled JVM. For more information, see [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md). Defaults to `false`. + +`xpack.security.fips_mode.required_providers` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Optionally enforce specific Java JCE/JSSE security providers. For example, set this to `["BCFIPS", "BCJSSE"]` (case-insensitive) to require the Bouncy Castle FIPS JCE and JSSE security providers. Only applicable when `xpack.security.fips_mode.enabled` is set to `true`. + + +### Password hashing settings [password-hashing-settings] + +`xpack.security.authc.password_hashing.algorithm` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the hashing algorithm that is used for secure user credential storage. See [Table 2, Password hashing algorithms](#password-hashing-algorithms). If `xpack.security.fips_mode.enabled` is true (see [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md)), defaults to `pbkdf2_stretch`. In all other cases, defaults to `bcrypt`. + + +### Anonymous access settings [anonymous-access-settings] + +You can configure the following anonymous access settings in `elasticsearch.yml`. For more information, see [Enabling anonymous access](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/anonymous-access.md). + +`xpack.security.authc.anonymous.username` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The username (principal) of the anonymous user. Defaults to `_es_anonymous_user`. + +`xpack.security.authc.anonymous.roles` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The roles to associate with the anonymous user. Required. + +`xpack.security.authc.anonymous.authz_exception` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) When `true`, an HTTP 403 response is returned if the anonymous user does not have the appropriate permissions for the requested action. The user is not prompted to provide credentials to access the requested resource. When set to `false`, an HTTP 401 response is returned and the user can provide credentials with the appropriate permissions to gain access. Defaults to `true`. + + +### Automata Settings [security-automata-settings] + +In places where the {{security-features}} accept wildcard patterns (e.g. index patterns in roles, group matches in the role mapping API), each pattern is compiled into an Automaton. The follow settings are available to control this behaviour. + +`xpack.security.automata.max_determinized_states` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The upper limit on how many automaton states may be created by a single pattern. This protects against too-difficult (e.g. exponentially hard) patterns. Defaults to `100,000`. + +`xpack.security.automata.cache.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Whether to cache the compiled automata. Compiling automata can be CPU intensive and may slowdown some operations. The cache reduces the frequency with which automata need to be compiled. Defaults to `true`. + +`xpack.security.automata.cache.size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum number of items to retain in the automata cache. Defaults to `10,000`. + +`xpack.security.automata.cache.ttl` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The length of time to retain in an item in the automata cache (based on most recent usage). Defaults to `48h` (48 hours). + + +### Document and field level security settings [field-document-security-settings] + +You can set the following document and field level security settings in `elasticsearch.yml`. For more information, see [Setting up field and document level security](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/controlling-access-at-document-field-level.md). + +`xpack.security.dls_fls.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set to `false` to prevent document and field level security from being configured. Defaults to `true`. + +`xpack.security.dls.bitset.cache.ttl` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The time-to-live for cached `BitSet` entries for document level security. Document level security queries may depend on Lucene BitSet objects, and these are automatically cached to improve performance. Defaults to expire entries that are unused for `2h` (2 hours). + +`xpack.security.dls.bitset.cache.size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum memory usage of cached `BitSet` entries for document level security. Document level security queries may depend on Lucene BitSet objects, and these are automatically cached to improve performance. Can be configured as a raw number of bytes (such as `200mb` or `1g`) or a percentage of the node’s JVM heap memory (such as `5%`). When the default value is exceeded, the least recently used entries are evicted. Defaults to `10%` of the heap assigned to the node. + + +### Token service settings [token-service-settings] + +You can set the following token service settings in `elasticsearch.yml`. + +`xpack.security.authc.token.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set to `false` to disable the built-in token service. Defaults to `true` unless `xpack.security.http.ssl.enabled` is `false`. This prevents sniffing the token from a connection over plain http. + +`xpack.security.authc.token.timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The length of time that a token is valid for. By default this value is `20m` or 20 minutes. The maximum value is 1 hour. + + +### API key service settings [api-key-service-settings] + +You can set the following API key service settings in `elasticsearch.yml`. + +`xpack.security.authc.api_key.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set to `false` to disable the built-in API key service. Defaults to `true`. + +`xpack.security.authc.api_key.cache.ttl` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The time-to-live for cached API key entries. A API key id and a hash of its API key are cached for this period of time. Specify the time period using the standard {{es}} [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). Defaults to `1d`. + +`xpack.security.authc.api_key.cache.max_keys` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum number of API key entries that can live in the cache at any given time. Defaults to 10,000. + +`xpack.security.authc.api_key.cache.hash_algo` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), Expert) The hashing algorithm that is used for the in-memory cached API key credentials. For possible values, see [Table 1, Cache hash algorithms](#cache-hash-algo). Defaults to `ssha256`. + +$$$api-key-service-settings-delete-retention-period$$$ + +`xpack.security.authc.api_key.delete.retention_period` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Invalidated or expired API keys older than the retention period are eligible for deletion. Defaults to `7d`. + +::::{note} +Large real-time clock inconsistency across cluster nodes can cause problems with evaluating the API key retention period. That is, if the clock on the node invalidating the API key is significantly different than the one performing the deletion, the key may be retained for longer or shorter than the configured retention period. +:::: + + +`xpack.security.authc.api_key.delete.interval` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), Expert) Cluster nodes schedule the automatic deletion of invalidated or expired API keys that are older than the retention period. This setting controls the minimum time interval between two such deletion jobs. Defaults to `24h`. + + ::::{note} + This is a low-level setting that currently controls the interval between deletion jobs triggered per-node, not across the cluster. + :::: + + +`xpack.security.authc.api_key.delete.timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), Expert) Sets the timeout of the internal search and delete call. + +`xpack.security.authc.api_key.hashing.algorithm` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the hashing algorithm that is used for securing API key credentials. See [Table 3, Secure token hashing algorithms](#secure-token-hashing-algorithms). Defaults to `ssha256`. + + +### Security domain settings [security-domain-settings] + +You configure security domain settings in the `xpack.security.authc.domains` namespace in `elasticsearch.yml`. + +For example: + +```yaml +xpack: + security: + authc: + domains: + my_domain: <1> + realms: [ 'default_native', 'saml1' ] <2> +``` + +1. Specifies the name of the security domain +2. Specifies the realms that belong to the domain + + + +### Realm settings [realm-settings] + +You configure realm settings in the `xpack.security.authc.realms` namespace in `elasticsearch.yml`. + +For example: + +```yaml +xpack.security.authc.realms: + + native.realm1: <1> + order: 0 <2> + ... + + ldap.realm2: + order: 1 + ... + + active_directory.realm3: + order: 2 + ... + ... +``` + +1. Specifies the type of realm (for example, `native`, `ldap`, `active_directory`, `pki`, `file`, `kerberos`, `saml`) and the realm name. This information is required. +2. Specifies priority of a realm in the realm chain. This information is required. + + +The valid settings vary depending on the realm type. For more information, see [*User authentication*](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/user-authentication.md). + + +#### Settings valid for all realms [ref-realm-settings] + +`order` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The priority of the realm within the realm chain. Realms with a lower order are consulted first. The value must be unique for each realm. This setting is required. + +`enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Indicates whether a realm is enabled. You can use this setting to disable a realm without removing its configuration information. Defaults to `true`. + + +#### Native realm settings [ref-native-settings] + +In addition to the [settings that are valid for all realms](#ref-realm-settings), you can specify the following optional settings: + +`cache.ttl` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The time-to-live for cached user entries. A user and a hash of its credentials are cached for this period of time. Specify the time period using the standard {{es}} [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). Defaults to `20m`. + +`cache.max_users` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum number of user entries that can live in the cache at any given time. Defaults to 100,000. + +`cache.hash_algo` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), Expert) The hashing algorithm that is used for the in-memory cached user credentials. For possible values, see [Table 1, Cache hash algorithms](#cache-hash-algo). Defaults to `ssha256`. + +`authentication.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) If set to `false`, disables authentication support in this realm, so that it only supports user lookups. (See the [run as](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/submitting-requests-on-behalf-of-other-users.md) and [authorization realms](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/realm-chains.md#authorization_realms) features). Defaults to `true`. + + +#### File realm settings [ref-users-settings] + +In addition to the [settings that are valid for all realms](#ref-realm-settings), you can specify the following settings: + +`cache.ttl` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The time-to-live for cached user entries. A user and a hash of its credentials are cached for this configured period of time. Defaults to `20m`. Specify values using the standard {{es}} [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). Defaults to `20m`. + +`cache.max_users` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum number of user entries that can live in the cache at a given time. Defaults to 100,000. + +`cache.hash_algo` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), Expert) The hashing algorithm that is used for the in-memory cached user credentials. See [Table 1, Cache hash algorithms](#cache-hash-algo). Defaults to `ssha256`. + +`authentication.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) If set to `false`, disables authentication support in this realm, so that it only supports user lookups. (See the [run as](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/submitting-requests-on-behalf-of-other-users.md) and [authorization realms](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/realm-chains.md#authorization_realms) features). Defaults to `true`. + + +#### LDAP realm settings [ref-ldap-settings] + +In addition to the [Settings valid for all realms](#ref-realm-settings), you can specify the following settings: + +`url` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) One or more LDAP URLs in the `ldap[s]://:` format. Required. + + To provide multiple URLs, use a YAML array (`["ldap://server1:636", "ldap://server2:636"]`) or comma-separated string (`"ldap://server1:636, ldap://server2:636"`). + + While both are supported, you can’t mix the `ldap` and `ldaps` protocols. + + +`load_balance.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The behavior to use when there are multiple LDAP URLs defined. For supported values see [load balancing and failover types](#load-balancing). Defaults to `failover`. + +`load_balance.cache_ttl` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) When using `dns_failover` or `dns_round_robin` as the load balancing type, this setting controls the amount of time to cache DNS lookups. Defaults to `1h`. + +`bind_dn` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The DN of the user that is used to bind to the LDAP and perform searches. Only applicable in user search mode. If not specified, an anonymous bind is attempted. Defaults to Empty. Due to its potential security impact, `bind_dn` is not exposed via the [nodes info API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-nodes-info). + +`bind_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [6.3] Use `secure_bind_password` instead. The password for the user that is used to bind to the LDAP directory. Defaults to Empty. Due to its potential security impact, `bind_password` is not exposed via the [nodes info API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-nodes-info). + +`secure_bind_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md), [Reloadable](docs-content://deploy-manage/security/secure-settings.md#reloadable-secure-settings)) The password for the user that is used to bind to the LDAP directory. Defaults to Empty. + +`user_dn_templates` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The DN template that replaces the user name with the string `{{0}}`. This setting is multivalued; you can specify multiple user contexts. Required to operate in user template mode. If `user_search.base_dn` is specified, this setting is not valid. For more information on the different modes, see [LDAP user authentication](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/ldap.md). + +`authorization_realms` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The names of the realms that should be consulted for delegated authorization. If this setting is used, then the LDAP realm does not perform role mapping and instead loads the user from the listed realms. The referenced realms are consulted in the order that they are defined in this list. See [Delegating authorization to another realm](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/realm-chains.md#authorization_realms). + + ::::{note} + If any settings starting with `user_search` are specified, the `user_dn_templates` settings are ignored. + :::: + + +`user_group_attribute` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the attribute to examine on the user for group membership. If any `group_search` settings are specified, this setting is ignored. Defaults to `memberOf`. + +`user_full_name_attribute` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the attribute to examine on the user for the full name of the user. Defaults to `cn`. + +`user_email_attribute` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the attribute to examine on the user for the email address of the user. Defaults to `mail`. + +`user_search.base_dn` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies a container DN to search for users. Required to operated in user search mode. If `user_dn_templates` is specified, this setting is not valid. For more information on the different modes, see [LDAP user authentication](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/ldap.md). + +`user_search.scope` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The scope of the user search. Valid values are `sub_tree`, `one_level` or `base`. `one_level` only searches objects directly contained within the `base_dn`. `sub_tree` searches all objects contained under `base_dn`. `base` specifies that the `base_dn` is the user object, and that it is the only user considered. Defaults to `sub_tree`. + +`user_search.filter` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the filter used to search the directory in attempts to match an entry with the username provided by the user. Defaults to `(uid={{0}})`. `{{0}}` is substituted with the username provided when searching. + +`user_search.attribute` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [5.6] Use `user_search.filter` instead. The attribute to match with the username sent with the request. Defaults to `uid`. + +`user_search.pool.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Enables or disables connection pooling for user search. If set to `false`, a new connection is created for every search. The default is `true` when `bind_dn` is set. + +`user_search.pool.size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum number of connections to the LDAP server to allow in the connection pool. Defaults to `20`. + +`user_search.pool.initial_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The initial number of connections to create to the LDAP server on startup. Defaults to `0`. If the LDAP server is down, values greater than `0` could cause startup failures. + +`user_search.pool.health_check.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Enables or disables a health check on LDAP connections in the connection pool. Connections are checked in the background at the specified interval. Defaults to `true`. + +`user_search.pool.health_check.dn` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The distinguished name that is retrieved as part of the health check. Defaults to the value of `bind_dn` if present; if not, falls back to `user_search.base_dn`. + +`user_search.pool.health_check.interval` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The interval to perform background checks of connections in the pool. Defaults to `60s`. + +`group_search.base_dn` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The container DN to search for groups in which the user has membership. When this element is absent, {{es}} searches for the attribute specified by `user_group_attribute` set on the user in order to determine group membership. + +`group_search.scope` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies whether the group search should be `sub_tree`, `one_level` or `base`. `one_level` only searches objects directly contained within the `base_dn`. `sub_tree` searches all objects contained under `base_dn`. `base` specifies that the `base_dn` is a group object, and that it is the only group considered. Defaults to `sub_tree`. + +`group_search.filter` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies a filter to use to look up a group. When not set, the realm searches for `group`, `groupOfNames`, `groupOfUniqueNames`, or `posixGroup` with the attributes `member`, `memberOf`, or `memberUid`. Any instance of `{{0}}` in the filter is replaced by the user attribute defined in `group_search.user_attribute`. + +`group_search.user_attribute` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the user attribute that is fetched and provided as a parameter to the filter. If not set, the user DN is passed into the filter. Defaults to Empty. + +`unmapped_groups_as_roles` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) If set to `true`, the names of any unmapped LDAP groups are used as role names and assigned to the user. A group is considered to be *unmapped* if it is not referenced in a [role-mapping file](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/mapping-users-groups-to-roles.md#mapping-roles-file). API-based role mappings are not considered. Defaults to `false`. + +`files.role_mapping` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The [location](docs-content://deploy-manage/security.md) for the [YAML role mapping configuration file](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/mapping-users-groups-to-roles.md). Defaults to `ES_PATH_CONF/role_mapping.yml`. + +`follow_referrals` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies whether {{es}} should follow referrals returned by the LDAP server. Referrals are URLs returned by the server that are to be used to continue the LDAP operation (for example, search). Defaults to `true`. + +`metadata` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A list of additional LDAP attributes that should be loaded from the LDAP server and stored in the authenticated user’s metadata field. + +`timeout.tcp_connect` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The TCP connect timeout period for establishing an LDAP connection. An `s` at the end indicates seconds, or `ms` indicates milliseconds. Defaults to `5s` (5 seconds ). + +`timeout.tcp_read` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.7] The TCP read timeout period after establishing an LDAP connection. This is equivalent to and is deprecated in favor of `timeout.response` and they cannot be used simultaneously. An `s` at the end indicates seconds, or `ms` indicates milliseconds. + +`timeout.response` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The time interval to wait for the response from the LDAP server. An `s` at the end indicates seconds, or `ms` indicates milliseconds. Defaults to the value of `timeout.ldap_search`. + +`timeout.ldap_search` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The timeout period for an LDAP search. The value is specified in the request and is enforced by the receiving LDAP Server. An `s` at the end indicates seconds, or `ms` indicates milliseconds. Defaults to `5s` (5 seconds ). + +`ssl.key` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Path to a PEM encoded file containing the private key. + + If HTTP client authentication is required, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + If the LDAP server requires client authentication, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + +`ssl.key_passphrase` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. [7.17.0] Prefer `ssl.secure_key_passphrase` instead. + + You cannot use this setting and `ssl.secure_key_passphrase` at the same time. + + +`ssl.secure_key_passphrase` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. + +`ssl.certificate` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path for the PEM encoded certificate (or certificate chain) that is associated with the key. + + This setting can be used only if `ssl.key` is set. + + This certificate is presented to clients when they connect. + + +`ssl.certificate_authorities` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) List of paths to PEM encoded certificate files that should be trusted. + + This setting and `ssl.truststore.path` cannot be used at the same time. + + You cannot use this setting and `ssl.truststore.path` at the same time. + + +`ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + You cannot use this setting and `ssl.key` at the same time. + + +`ssl.keystore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the keystore file. It must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or ".pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`ssl.keystore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the keystore. [7.17.0] Prefer `ssl.keystore.secure_password` instead. + +`ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + +`ssl.keystore.key_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the key in the keystore. The default is the keystore password. [7.17.0] Prefer `ssl.keystore.secure_key_password` instead. + + You cannot use this setting and `ssl.keystore.secure_password` at the same time. + + +`ssl.keystore.secure_key_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the key in the keystore. The default is the keystore password. + +`ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`ssl.truststore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the truststore. [7.17.0] Prefer `ssl.truststore.secure_password` instead. + + You cannot use this setting and `ssl.truststore.secure_password` at the same time. + + +`ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + +`ssl.truststore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the truststore file. It must be either `jks` or `PKCS12`. If the file name ends in ".p12", ".pfx" or "pkcs12", the default is `PKCS12`. Otherwise, it defaults to `jks`. + +`ssl.verification_mode` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Indicates the type of verification when using `ldaps` to protect against man in the middle attacks and certificate forgery. + + ::::{dropdown} Valid values + `full` + : Validates that the provided certificate: has an issue date that’s within the `not_before` and `not_after` dates; chains to a trusted Certificate Authority (CA); has a `hostname` or IP address that matches the names within the certificate. + + `certificate` + : Validates the provided certificate and verifies that it’s signed by a trusted authority (CA), but doesn’t check the certificate `hostname`. + + `none` + : Performs no certificate validation. + + ::::{important} + Setting certificate validation to `none` disables many security benefits of SSL/TLS, which is very dangerous. Only set this value if instructed by Elastic Support as a temporary diagnostic mechanism when attempting to resolve TLS errors. + :::: + + + :::: + + + Defaults to `full`. + + +`ssl.supported_protocols` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported protocols with versions. Valid protocols: `SSLv2Hello`, `SSLv3`, `TLSv1`, `TLSv1.1`, `TLSv1.2`, `TLSv1.3`. If the JVM’s SSL provider supports TLSv1.3, the default is `TLSv1.3,TLSv1.2,TLSv1.1`. Otherwise, the default is `TLSv1.2,TLSv1.1`. + + {{es}} relies on your JDK’s implementation of SSL and TLS. View [Supported SSL/TLS versions by JDK version](docs-content://deploy-manage/security/supported-ssltls-versions-by-jdk-version.md) for more information. + + ::::{note} + If `xpack.security.fips_mode.enabled` is `true`, you cannot use `SSLv2Hello` or `SSLv3`. See [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md). + :::: + + +`ssl.cipher_suites` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the cipher suites that should be supported when communicating with the LDAP server. Supported cipher suites vary depending on which version of Java you use. For example, for version 12 the default value is `TLS_AES_256_GCM_SHA384`, `TLS_AES_128_GCM_SHA256`, `TLS_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA`, `TLS_RSA_WITH_AES_256_GCM_SHA384`, `TLS_RSA_WITH_AES_128_GCM_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA256`, `TLS_RSA_WITH_AES_128_CBC_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA`, `TLS_RSA_WITH_AES_128_CBC_SHA`. + + For more information, see Oracle’s [Java Cryptography Architecture documentation](https://docs.oracle.com/en/java/javase/11/security/oracle-providers.md#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2). + + +`cache.ttl` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the time-to-live for cached user entries. A user and a hash of its credentials are cached for this period of time. Use the standard {{es}} [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). Defaults to `20m`. + +`cache.max_users` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the maximum number of user entries that the cache can contain. Defaults to `100000`. + +`cache.hash_algo` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), Expert) Specifies the hashing algorithm that is used for the in-memory cached user credentials. See [Table 1, Cache hash algorithms](#cache-hash-algo). Defaults to `ssha256`. + +`authentication.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) If set to `false`, disables authentication support in this realm, so that it only supports user lookups. (See the [run as](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/submitting-requests-on-behalf-of-other-users.md) and [authorization realms](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/realm-chains.md#authorization_realms) features). Defaults to `true`. + + +#### Active Directory realm settings [ref-ad-settings] + +In addition to the [settings that are valid for all realms](#ref-realm-settings), you can specify the following settings: + +`url` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) One or more LDAP URLs in the `ldap[s]://:` format. Defaults to `ldap://:389`. This setting is required when connecting using SSL/TLS or when using a custom port. + + To provide multiple URLs, use a YAML array (`["ldap://server1:636", "ldap://server2:636"]`) or comma-separated string (`"ldap://server1:636, ldap://server2:636"`). + + While both are supported, you can’t mix the `ldap` and `ldaps` protocols. + + If no URL is provided, {{es}} uses a default of `ldap://:389`. This default uses the `domain_name` setting value and assumes an unencrypted connection to port 389. + + +`load_balance.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The behavior to use when there are multiple LDAP URLs defined. For supported values see [load balancing and failover types](#load-balancing). Defaults to `failover`. + +`load_balance.cache_ttl` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) When using `dns_failover` or `dns_round_robin` as the load balancing type, this setting controls the amount of time to cache DNS lookups. Defaults to `1h`. + +`domain_name` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The domain name of Active Directory. If the `url` and the `user_search.base_dn` settings are not specified, the cluster can derive those values from this setting. Required. + +`bind_dn` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The DN of the user that is used to bind to Active Directory and perform searches. Defaults to Empty. Due to its potential security impact, `bind_dn` is not exposed via the [nodes info API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-nodes-info). + +`bind_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [6.3] Use `secure_bind_password` instead. The password for the user that is used to bind to Active Directory. Defaults to Empty. Due to its potential security impact, `bind_password` is not exposed via the [nodes info API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-nodes-info). + +`secure_bind_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md), [Reloadable](docs-content://deploy-manage/security/secure-settings.md#reloadable-secure-settings)) The password for the user that is used to bind to Active Directory. Defaults to Empty. + +`unmapped_groups_as_roles` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) If set to `true`, the names of any unmapped Active Directory groups are used as role names and assigned to the user. A group is considered *unmapped* when it is not referenced in any role-mapping files. API-based role mappings are not considered. Defaults to `false`. + +`files.role_mapping` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The [location](docs-content://deploy-manage/security.md) for the YAML role mapping configuration file. Defaults to `ES_PATH_CONF/role_mapping.yml`. + +`user_search.base_dn` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The context to search for a user. Defaults to the root of the Active Directory domain. + +`user_search.scope` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies whether the user search should be `sub_tree`, `one_level` or `base`. `one_level` only searches users directly contained within the `base_dn`. `sub_tree` searches all objects contained under `base_dn`. `base` specifies that the `base_dn` is a user object, and that it is the only user considered. Defaults to `sub_tree`. + +`user_search.filter` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies a filter to use to lookup a user given a username. The default filter looks up `user` objects with either `sAMAccountName` or `userPrincipalName`. If specified, this must be a valid LDAP user search filter. For example `(&(objectClass=user)(sAMAccountName={{0}}))`. For more information, see [Search Filter Syntax](https://msdn.microsoft.com/en-us/library/aa746475(v=vs.85).aspx). + +`user_search.upn_filter` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies a filter to use to lookup a user given a user principal name. The default filter looks up `user` objects with a matching `userPrincipalName`. If specified, this must be a valid LDAP user search filter. For example, `(&(objectClass=user)(userPrincipalName={{1}}))`. `{{1}}` is the full user principal name provided by the user. For more information, see [Search Filter Syntax](https://msdn.microsoft.com/en-us/library/aa746475(v=vs.85).aspx). + +`user_search.down_level_filter` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies a filter to use to lookup a user given a down level logon name (DOMAIN\user). The default filter looks up `user` objects with a matching `sAMAccountName` in the domain provided. If specified, this must be a valid LDAP user search filter. For example, `(&(objectClass=user)(sAMAccountName={{0}}))`. For more information, see [Search Filter Syntax](https://msdn.microsoft.com/en-us/library/aa746475(v=vs.85).aspx). + +`user_search.pool.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Enables or disables connection pooling for user search. When disabled a new connection is created for every search. The default is `true` when `bind_dn` is provided. + +`user_search.pool.size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum number of connections to the Active Directory server to allow in the connection pool. Defaults to `20`. + +`user_search.pool.initial_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The initial number of connections to create to the Active Directory server on startup. Defaults to `0`. If the LDAP server is down, values greater than 0 could cause startup failures. + +`user_search.pool.health_check.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Enables or disables a health check on Active Directory connections in the connection pool. Connections are checked in the background at the specified interval. Defaults to `true`. + +`user_search.pool.health_check.dn` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The distinguished name to be retrieved as part of the health check. Defaults to the value of `bind_dn` if that setting is present. Otherwise, it defaults to the value of the `user_search.base_dn` setting. + +`user_search.pool.health_check.interval` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The interval to perform background checks of connections in the pool. Defaults to `60s`. + +`group_search.base_dn` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The context to search for groups in which the user has membership. Defaults to the root of the Active Directory domain. + +`group_search.scope` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies whether the group search should be `sub_tree`, `one_level` or `base`. `one_level` searches for groups directly contained within the `base_dn`. `sub_tree` searches all objects contained under `base_dn`. `base` specifies that the `base_dn` is a group object, and that it is the only group considered. Defaults to `sub_tree`. + +`metadata` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A list of additional LDAP attributes that should be loaded from the LDAP server and stored in the authenticated user’s metadata field. + +`timeout.tcp_connect` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The TCP connect timeout period for establishing an LDAP connection. An `s` at the end indicates seconds, or `ms` indicates milliseconds. Defaults to `5s` (5 seconds ). + +`timeout.tcp_read` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) [7.7] The TCP read timeout period after establishing an LDAP connection. This is equivalent to and is deprecated in favor of `timeout.response` and they cannot be used simultaneously. An `s` at the end indicates seconds, or `ms` indicates milliseconds. Defaults to the value of `timeout.ldap_search`. + +`timeout.response` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The time interval to wait for the response from the AD server. An `s` at the end indicates seconds, or `ms` indicates milliseconds. Defaults to the value of `timeout.ldap_search`. + +`timeout.ldap_search` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The timeout period for an LDAP search. The value is specified in the request and is enforced by the receiving LDAP Server. An `s` at the end indicates seconds, or `ms` indicates milliseconds. Defaults to `5s` (5 seconds ). + +`ssl.certificate` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path for the PEM encoded certificate (or certificate chain) that is associated with the key. + + This setting can be used only if `ssl.key` is set. + + This certificate is presented to clients when they connect. + + +`ssl.certificate_authorities` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) List of paths to PEM encoded certificate files that should be trusted. + + This setting and `ssl.truststore.path` cannot be used at the same time. + + You cannot use this setting and `ssl.truststore.path` at the same time. + + +`ssl.key` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Path to a PEM encoded file containing the private key. + + If HTTP client authentication is required, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + If the Active Directory server requires client authentication, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + +`ssl.key_passphrase` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. [7.17.0] Prefer `ssl.secure_key_passphrase` instead. + + You cannot use this setting and `ssl.secure_key_passphrase` at the same time. + + +`ssl.secure_key_passphrase` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. + +`ssl.keystore.key_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the key in the keystore. The default is the keystore password. [7.17.0] Prefer `ssl.keystore.secure_key_password` instead. + + You cannot use this setting and `ssl.keystore.secure_password` at the same time. + + +`ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + +`ssl.keystore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the keystore. [7.17.0] Prefer `ssl.keystore.secure_password` instead. + +`ssl.secure_keystore.password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + +`ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + You cannot use this setting and `ssl.key` at the same time. + + +`ssl.keystore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the keystore file. It must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or ".pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`ssl.truststore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the truststore. [7.17.0] Prefer `ssl.truststore.secure_password` instead. + + You cannot use this setting and `ssl.truststore.secure_password` at the same time. + + +`ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + +`ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`ssl.truststore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the truststore file. It must be either `jks` or `PKCS12`. If the file name ends in ".p12", ".pfx" or "pkcs12", the default is `PKCS12`. Otherwise, it defaults to `jks`. + +`ssl.verification_mode` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Indicates the type of verification when using `ldaps` to protect against man in the middle attacks and certificate forgery. + + ::::{dropdown} Valid values + `full` + : Validates that the provided certificate: has an issue date that’s within the `not_before` and `not_after` dates; chains to a trusted Certificate Authority (CA); has a `hostname` or IP address that matches the names within the certificate. + + `certificate` + : Validates the provided certificate and verifies that it’s signed by a trusted authority (CA), but doesn’t check the certificate `hostname`. + + `none` + : Performs no certificate validation. + + ::::{important} + Setting certificate validation to `none` disables many security benefits of SSL/TLS, which is very dangerous. Only set this value if instructed by Elastic Support as a temporary diagnostic mechanism when attempting to resolve TLS errors. + :::: + + + :::: + + + Defaults to `full`. + + +`ssl.supported_protocols` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported protocols with versions. Valid protocols: `SSLv2Hello`, `SSLv3`, `TLSv1`, `TLSv1.1`, `TLSv1.2`, `TLSv1.3`. If the JVM’s SSL provider supports TLSv1.3, the default is `TLSv1.3,TLSv1.2,TLSv1.1`. Otherwise, the default is `TLSv1.2,TLSv1.1`. + + {{es}} relies on your JDK’s implementation of SSL and TLS. View [Supported SSL/TLS versions by JDK version](docs-content://deploy-manage/security/supported-ssltls-versions-by-jdk-version.md) for more information. + + ::::{note} + If `xpack.security.fips_mode.enabled` is `true`, you cannot use `SSLv2Hello` or `SSLv3`. See [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md). + :::: + + +`ssl.cipher_suites` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the cipher suites that should be supported when communicating with the Active Directory server. Supported cipher suites vary depending on which version of Java you use. For example, for version 12 the default value is `TLS_AES_256_GCM_SHA384`, `TLS_AES_128_GCM_SHA256`, `TLS_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA`, `TLS_RSA_WITH_AES_256_GCM_SHA384`, `TLS_RSA_WITH_AES_128_GCM_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA256`, `TLS_RSA_WITH_AES_128_CBC_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA`, `TLS_RSA_WITH_AES_128_CBC_SHA`. + + For more information, see Oracle’s [Java Cryptography Architecture documentation](https://docs.oracle.com/en/java/javase/11/security/oracle-providers.md#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2). + + +`cache.ttl` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the time-to-live for cached user entries. A user and a hash of its credentials are cached for this configured period of time. Use the standard Elasticsearch [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)). Defaults to `20m`. + +`cache.max_users` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the maximum number of user entries that the cache can contain. Defaults to `100000`. + +`cache.hash_algo` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting), Expert) Specifies the hashing algorithm that is used for the in-memory cached user credentials. See [Table 1, Cache hash algorithms](#cache-hash-algo). Defaults to `ssha256`. + +`authentication.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) If set to `false`, disables authentication support in this realm, so that it only supports user lookups. (See the [run as](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/submitting-requests-on-behalf-of-other-users.md) and [authorization realms](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/realm-chains.md#authorization_realms) features). Defaults to `true`. + +`follow_referrals` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) If set to `true`, {{es}} follows referrals returned by the LDAP server. Referrals are URLs returned by the server that are to be used to continue the LDAP operation (such as `search`). Defaults to `true`. + + +#### PKI realm settings [ref-pki-settings] + +In addition to the [settings that are valid for all realms](#ref-realm-settings), you can specify the following settings: + +`username_pattern` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The regular expression pattern used to extract the username from the certificate DN. The username is used for auditing and logging. The username can also be used with the [role mapping API](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/mapping-users-groups-to-roles.md) and [authorization delegation](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/authorization-delegation.md). The first match group is the used as the username. Defaults to `CN=(.*?)(?:,|$)`. + +`certificate_authorities` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) List of paths to the PEM certificate files that should be used to authenticate a user’s certificate as trusted. Defaults to the trusted certificates configured for SSL. This setting cannot be used with `truststore.path`. + +`truststore.algorithm` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Algorithm for the truststore. Defaults to `SunX509`. + +`truststore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the truststore. [7.17.0] Prefer `ssl.truststore.secure_password` instead. + + You cannot use this setting and `ssl.truststore.secure_password` at the same time. + + +If `truststore.path` is set, this setting is required. + +`truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + +`truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path of a truststore to use. Defaults to the trusted certificates configured for SSL. This setting cannot be used with `certificate_authorities`. + +`files.role_mapping` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the [location](docs-content://deploy-manage/security.md) of the [YAML role mapping configuration file](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/mapping-users-groups-to-roles.md). Defaults to `ES_PATH_CONF/role_mapping.yml`. + +`authorization_realms` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The names of the realms that should be consulted for delegated authorization. If this setting is used, then the PKI realm does not perform role mapping and instead loads the user from the listed realms. See [Delegating authorization to another realm](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/realm-chains.md#authorization_realms). + +`cache.ttl` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the time-to-live for cached user entries. A user and a hash of its credentials are cached for this period of time. Use the standard {{es}} [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)). Defaults to `20m`. + +`cache.max_users` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the maximum number of user entries that the cache can contain. Defaults to `100000`. + +`delegation.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Generally, in order for the clients to be authenticated by the PKI realm they must connect directly to {{es}}. That is, they must not pass through proxies which terminate the TLS connection. In order to allow for a **trusted** and **smart** proxy, such as Kibana, to sit before {{es}} and terminate TLS connections, but still allow clients to be authenticated on {{es}} by this realm, you need to toggle this to `true`. Defaults to `false`. If delegation is enabled, then either `truststore.path` or `certificate_authorities` setting must be defined. For more details, see [Configuring authentication delegation for PKI realms](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/pki.md#pki-realm-for-proxied-clients). + + +#### SAML realm settings [ref-saml-settings] + +In addition to the [settings that are valid for all realms](#ref-realm-settings), you can specify the following settings. + +`idp.entity_id` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The Entity ID of the SAML Identity Provider. An Entity ID is a URI with a maximum length of 1024 characters. It can be a URL ([https://idp.example.com/](https://idp.example.com/)) or a URN (`urn:example.com:idp`) and can be found in the configuration or the SAML metadata of the Identity Provider. + +`idp.metadata.path` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path *(recommended)* or URL to a SAML 2.0 metadata file describing the capabilities and configuration of the Identity Provider. If a path is provided, then it is resolved relative to the {{es}} config directory. If a URL is provided, then it must be either a `file` URL or a `https` URL. + + {{es}} automatically polls this metadata resource and reloads the IdP configuration when changes are detected. File based resources are polled at a frequency determined by the global {{es}} `resource.reload.interval.high` setting, which defaults to 5 seconds. HTTPS resources are polled at a frequency determined by the realm’s `idp.metadata.http.refresh` and `idp.metadata.http.minimum_refresh` settings. + + If the metadata resource is loaded from a file then the file must exist at node startup, if it does not exist then the node will fail to start. If the resource is loaded over HTTPS then (by default) the node will be tolerant of a failure to load the resource - the node will start and will continue to poll the URL for updates. The affected SAML realm will fail to authenticate users until the problem is resolved. To force the node to fail if the metadata is unavailable set `idp.metadata.http.fail_on_error` to `true`. + + +`idp.metadata.http.fail_on_error` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) If set to `true`, the realm will fail on startup (and prevent the node from starting) if it attempts to load metadata over HTTPS and that metadata is not available. If set to `false` (the default), the node will start but the affected SAML realm will not support user authentication until the metadata can be successfully loaded. This setting is ignored if metadata is loaded from a file. + +`idp.metadata.http.refresh` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the frequency with which `https` metadata is checked for changes. Defaults to `1h` (1 hour). + + Under some circumstances {{es}} may determine that the metadata needs to be checked more frequently. This may occur if previous attempts to load the metadata encountered an error, or if the metadata indicates that it is due to expire in less than the configured refresh interval. In these cases {{es}} will poll more often, but never more frequently than `idp.metadata.http.minimum_refresh`. If there is an attempt to authenticate against a realm that has not yet loaded metadata successfully, that realm may attempt to load metadata outside of the configured polling frequency. + + +`idp.metadata.http.minimum_refresh` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the minimum frequency with which `https` metadata is checked for changes. In regular operation {{es}} will use the value of `idp.metadata.http.refresh` as the polling interval. However, under some circumstances {{es}} may determine that it needs to poll more frequently. In these cases, the `minimum_refresh` will set the minimum frequency at which the metdata will be checked. Defaults to `5m` (5 minutes) and must not be set to a value greater than `idp.metadata.http.refresh` + +`idp.use_single_logout` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Indicates whether to utilise the Identity Provider’s Single Logout service (if one exists in the IdP metadata file). Defaults to `true`. + +`sp.entity_id` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The Entity ID to use for this SAML Service Provider. This should be entered as a URI. We recommend that you use the base URL of your Kibana instance. For example, `https://kibana.example.com/`. + +`sp.acs` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The URL of the Assertion Consumer Service within {{kib}}. Typically this is the "api/security/saml/callback" endpoint of your Kibana server. For example, `https://kibana.example.com/api/security/saml/callback`. + +`sp.logout` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The URL of the Single Logout service within {{kib}}. Typically this is the "logout" endpoint of your Kibana server. For example, `https://kibana.example.com/logout`. + +`attributes.principal` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The Name of the SAML attribute that contains the user’s principal (username). + +`attributes.groups` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The Name of the SAML attribute that contains the user’s groups. + +`attributes.name` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The Name of the SAML attribute that contains the user’s full name. + +`attributes.mail` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The Name of the SAML attribute that contains the user’s email address. + +`attributes.dn` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The Name of the SAML attribute that contains the user’s X.50 *Distinguished Name*. + +`attribute_patterns.principal` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A Java regular expression that is matched against the SAML attribute specified by `attributes.principal` before it is applied to the user’s *principal* property. The attribute value must match the pattern and the value of the first *capturing group* is used as the principal. For example, `^([^@]+)@example\\.com$` matches email addresses from the "example.com" domain and uses the local-part as the principal. + +`attribute_patterns.groups` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) As per `attribute_patterns.principal`, but for the *group* property. + +`attribute_patterns.name` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) As per `attribute_patterns.principal`, but for the *name* property. + +`attribute_patterns.mail` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) As per `attribute_patterns.principal`, but for the *mail* property. + +`attribute_patterns.dn` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) As per `attribute_patterns.principal`, but for the *dn* property. + +`attribute_delimiters.groups` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A plain string that is used as a delimiter to split a single-valued SAML attribute specified by `attributes.groups` before it is applied to the user’s *groups* property. For example, splitting the SAML attribute value `engineering,elasticsearch-admins,employees` on a delimiter value of `,` will result in `engineering`, `elasticsearch-admins`, and `employees` as the list of groups for the user. The delimiter will always be split on, regardless of escaping in the input string. This setting does not support multi-valued SAML attributes. It cannot be used together with the `attribute_patterns` setting. You can only configure this setting for the groups attribute. + +`nameid_format` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The NameID format that should be requested when asking the IdP to authenticate the current user. The default is to not include the `nameid_format` attribute. + +`nameid.allow_create` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The value of the `AllowCreate` attribute of the `NameIdPolicy` element in an authentication request. The default value is false. + +`nameid.sp_qualifier` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The value of the `SPNameQualifier` attribute of the `NameIdPolicy` element in an authentication request. The default is to not include the `SPNameQualifier` attribute. + +`force_authn` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies whether to set the `ForceAuthn` attribute when requesting that the IdP authenticate the current user. If set to `true`, the IdP is required to verify the user’s identity, irrespective of any existing sessions they might have. Defaults to `false`. + +`populate_user_metadata` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies whether to populate the {{es}} user’s metadata with the values that are provided by the SAML attributes. Defaults to `true`. + +`authorization_realms` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The names of the realms that should be consulted for delegated authorization. If this setting is used, then the SAML realm does not perform role mapping and instead loads the user from the listed realms. See [Delegating authorization to another realm](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/realm-chains.md#authorization_realms). + +`allowed_clock_skew` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum amount of skew that can be tolerated between the IdP’s clock and the {{es}} node’s clock. Defaults to `3m` (3 minutes). + +`req_authn_context_class_ref` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A comma separated list of Authentication Context Class Reference values to be included in the Requested Authentication Context when requesting the IdP to authenticate the current user. The Authentication Context of the corresponding authentication response should contain at least one of the requested values. + + For more information, see [Requesting specific authentication methods](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/saml.md#req-authn-context). + + + +#### SAML realm signing settings [ref-saml-signing-settings] + +If a signing key is configured (that is, either `signing.key` or `signing.keystore.path` is set), then {{es}} signs outgoing SAML messages. Signing can be configured using the following settings: + +`signing.saml_messages` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A list of SAML message types that should be signed or `*` to sign all messages. Each element in the list should be the local name of a SAML XML Element. Supported element types are `AuthnRequest`, `LogoutRequest` and `LogoutResponse`. Only valid if `signing.key` or `signing.keystore.path` is also specified. Defaults to `*`. + +`signing.key` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path to the PEM encoded private key to use for SAML message signing. `signing.key` and `signing.keystore.path` cannot be used at the same time. + +`signing.secure_key_passphrase` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Specifies the passphrase to decrypt the PEM encoded private key (`signing.key`) if it is encrypted. + +`signing.certificate` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path to the PEM encoded certificate (or certificate chain) that corresponds to the `signing.key`. This certificate must also be included in the Service Provider metadata or manually configured within the IdP to allow for signature validation. This setting can only be used if `signing.key` is set. + +`signing.keystore.path` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path to the keystore that contains a private key and certificate. It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `signing.key` at the same time. + +`signing.keystore.type` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The type of the keystore in `signing.keystore.path`. Must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or "pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`signing.keystore.alias` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the alias of the key within the keystore that should be used for SAML message signing. If the keystore contains more than one private key, this setting must be specified. + +`signing.keystore.secure_password` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password to the keystore in `signing.keystore.path`. + +`signing.keystore.secure_key_password` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore (`signing.keystore.path`). Defaults to the keystore password. + + +#### SAML realm encryption settings [ref-saml-encryption-settings] + +If an encryption key is configured (that is, either `encryption.key` or `encryption.keystore.path` is set), then {{es}} publishes an encryption certificate when generating metadata and attempts to decrypt incoming SAML content. Encryption can be configured using the following settings: + +`encryption.key` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path to the PEM encoded private key to use for SAML message decryption. `encryption.key` and `encryption.keystore.path` cannot be used at the same time. + +`encryption.secure_key_passphrase` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Specifies the passphrase to decrypt the PEM encoded private key (`encryption.key`) if it is encrypted. + +`encryption.certificate` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path to the PEM encoded certificate (or certificate chain) that is associated with the `encryption.key`. This certificate must also be included in the Service Provider metadata or manually configured within the IdP to enable message encryption. This setting can be used only if `encryption.key` is set. + +`encryption.keystore.path` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path to the keystore that contains a private key and certificate. It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `encryption.key` at the same time. + +`encryption.keystore.type` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The type of the keystore (`encryption.keystore.path`). Must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or "pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`encryption.keystore.alias` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the alias of the key within the keystore (`encryption.keystore.path`) that should be used for SAML message decryption. If not specified, all compatible key pairs from the keystore are considered as candidate keys for decryption. + +`encryption.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password to the keystore (`encryption.keystore.path`). + +`encryption.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore (`encryption.keystore.path`). Only a single password is supported. If you are using multiple decryption keys, they cannot have individual passwords. + + +#### SAML realm SSL settings [ref-saml-ssl-settings] + +If you are loading the IdP metadata over SSL/TLS (that is, `idp.metadata.path` is a URL using the `https` protocol), the following settings can be used to configure SSL. + +::::{note} +These settings are not used for any purpose other than loading metadata over https. +:::: + + +`ssl.key` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Path to a PEM encoded file containing the private key. + + If HTTP client authentication is required, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + +`ssl.key_passphrase` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. [7.17.0] Prefer `ssl.secure_key_passphrase` instead. + + You cannot use this setting and `ssl.secure_key_passphrase` at the same time. + + +`ssl.secure_key_passphrase` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. + + You cannot use this setting and `ssl.key_passphrase` at the same time. + + +`ssl.certificate` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path for the PEM encoded certificate (or certificate chain) that is associated with the key. + + This setting can be used only if `ssl.key` is set. + + +`ssl.certificate_authorities` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) List of paths to PEM encoded certificate files that should be trusted. + + This setting and `ssl.truststore.path` cannot be used at the same time. + + +`ssl.keystore.path` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`ssl.keystore.type` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the keystore file. It must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or ".pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`ssl.keystore.password` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the keystore. [7.17.0] Prefer `ssl.keystore.secure_password` instead. + +`ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + + You cannot use this setting and `ssl.keystore.password` at the same time. + + +`ssl.keystore.key_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the key in the keystore. The default is the keystore password. [7.17.0] Prefer `ssl.keystore.secure_key_password` instead. + + You cannot use this setting and `ssl.keystore.secure_password` at the same time. + + You cannot use this setting and `ssl.keystore.secure_key_password` at the same time. + + +`ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + +You cannot use this setting and `ssl.keystore.key_password` at the same time. + +`ssl.truststore.path` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`ssl.truststore.type` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the truststore file. It must be either `jks` or `PKCS12`. If the file name ends in ".p12", ".pfx" or "pkcs12", the default is `PKCS12`. Otherwise, it defaults to `jks`. + +`ssl.truststore.password` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the truststore. [7.17.0] Prefer `ssl.truststore.secure_password` instead. + + You cannot use this setting and `ssl.truststore.secure_password` at the same time. + + +`ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + This setting cannot be used with `ssl.truststore.password`. + + +`ssl.verification_mode` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the verification of certificates. + + ::::{dropdown} Valid values + `full` + : Validates that the provided certificate: has an issue date that’s within the `not_before` and `not_after` dates; chains to a trusted Certificate Authority (CA); has a `hostname` or IP address that matches the names within the certificate. + + `certificate` + : Validates the provided certificate and verifies that it’s signed by a trusted authority (CA), but doesn’t check the certificate `hostname`. + + `none` + : Performs no certificate validation. + + ::::{important} + Setting certificate validation to `none` disables many security benefits of SSL/TLS, which is very dangerous. Only set this value if instructed by Elastic Support as a temporary diagnostic mechanism when attempting to resolve TLS errors. + :::: + + + :::: + + + Defaults to `full`. + + +`ssl.supported_protocols` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported protocols with versions. Valid protocols: `SSLv2Hello`, `SSLv3`, `TLSv1`, `TLSv1.1`, `TLSv1.2`, `TLSv1.3`. If the JVM’s SSL provider supports TLSv1.3, the default is `TLSv1.3,TLSv1.2,TLSv1.1`. Otherwise, the default is `TLSv1.2,TLSv1.1`. + + {{es}} relies on your JDK’s implementation of SSL and TLS. View [Supported SSL/TLS versions by JDK version](docs-content://deploy-manage/security/supported-ssltls-versions-by-jdk-version.md) for more information. + + ::::{note} + If `xpack.security.fips_mode.enabled` is `true`, you cannot use `SSLv2Hello` or `SSLv3`. See [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md). + :::: + + +`ssl.cipher_suites` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported cipher suites vary depending on which version of Java you use. For example, for version 12 the default value is `TLS_AES_256_GCM_SHA384`, `TLS_AES_128_GCM_SHA256`, `TLS_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA`, `TLS_RSA_WITH_AES_256_GCM_SHA384`, `TLS_RSA_WITH_AES_128_GCM_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA256`, `TLS_RSA_WITH_AES_128_CBC_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA`, `TLS_RSA_WITH_AES_128_CBC_SHA`. + + For more information, see Oracle’s [Java Cryptography Architecture documentation](https://docs.oracle.com/en/java/javase/11/security/oracle-providers.md#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2). + + + +#### Kerberos realm settings [ref-kerberos-settings] + +In addition to the [settings that are valid for all realms](#ref-realm-settings), you can specify the following settings: + +`keytab.path` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path to the Kerberos keytab file that contains the service principal used by this {{es}} node. This must be a location within the {{es}} configuration directory and the file must have read permissions. Required. + +`remove_realm_name` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set to `true` to remove the realm part of principal names. Principal names in Kerberos have the form `user/instance@REALM`. If this option is `true`, the realm part (`@REALM`) will not be included in the username. Defaults to `false`. + +`krb.debug` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set to `true` to enable debug logs for the Java login module that provides support for Kerberos authentication. Defaults to `false`. + +`cache.ttl` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The time-to-live for cached user entries. A user is cached for this period of time. Specify the time period using the standard {{es}} [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). Defaults to `20m`. + +`cache.max_users` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum number of user entries that can live in the cache at any given time. Defaults to 100,000. + +`authorization_realms` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The names of the realms that should be consulted for delegated authorization. If this setting is used, then the Kerberos realm does not perform role mapping and instead loads the user from the listed realms. See [Delegating authorization to another realm](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/realm-chains.md#authorization_realms). + + +#### OpenID Connect realm settings [ref-oidc-settings] + +In addition to the [settings that are valid for all realms](#ref-realm-settings), you can specify the following settings. + +`op.issuer` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A verifiable Identifier for your OpenID Connect Provider. An Issuer Identifier is usually a case sensitive URL using the https scheme that contains scheme, host, and optionally, port number and path components and no query or fragment components. The value for this setting should be provided by your OpenID Connect Provider. + +`op.authorization_endpoint` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The URL for the Authorization Endpoint at the OpenID Connect Provider. The value for this setting should be provided by your OpenID Connect Provider. + +`op.token_endpoint` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The URL for the Token Endpoint at the OpenID Connect Provider. The value for this setting should be provided by your OpenID Connect Provider. + +`op.userinfo_endpoint` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The URL for the User Info Endpoint at the OpenID Connect Provider. The value for this setting should be provided by your OpenID Connect Provider. + +`op.endsession_endpoint` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The URL for the End Session Endpoint at the OpenID Connect Provider. The value for this setting should be provided by your OpenID Connect Provider. + +`op.jwkset_path` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) + +The file name or URL to a JSON Web Key Set (JWKS) with the public key material used to verify tokens and claims responses signed by the OpenID Connect Provider. A value is considered a file name if it does not begin with `https` or `http`. The file name is resolved relative to the {{es}} configuration directory. Changes to the file are polled at a frequency determined by the global {{es}} `resource.reload.interval.high` setting, which defaults to 5 seconds. + ++ If a URL is provided, then it must begin with `https://` or `http://`. {{es}} automatically caches the retrieved JWK and will attempt to refresh the JWK upon signature verification failure, as this might indicate that the OpenID Connect Provider has rotated the signing keys. + +`authorization_realms` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The names of the realms that should be consulted for delegated authorization. If this setting is used, then the OpenID Connect realm does not perform role mapping and instead loads the user from the listed realms. See [Delegating authorization to another realm](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/realm-chains.md#authorization_realms). + +`rp.client_id` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The OAuth 2.0 Client Identifier that was assigned to {{es}} during registration at the OpenID Connect Provider. + +`rp.client_secret` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The OAuth 2.0 Client Secret that was assigned to {{es}} during registration at the OpenID Connect Provider. + +`rp.client_auth_method` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The client authentication method used by {{es}} to authenticate to the OpenID Connect Provider. Can be `client_secret_basic`, `client_secret_post`, or `client_secret_jwt`. Defaults to `client_secret_basic`. + +`rp.client_auth_jwt_signature_algorithm` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The signature algorithm that {{es}} uses to sign the JWT with which it authenticates as a client to the OpenID Connect Provider when `client_secret_jwt` is selected for `rp.client_auth_method`. Can be either `HS256`, `HS384`, or `HS512`. Defaults to `HS384`. + +`rp.redirect_uri` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The Redirect URI within {{kib}}. If you want to use the authorization code flow, this is the `api/security/oidc/callback` endpoint of your {{kib}} server. If you want to use the implicit flow, it is the `api/security/oidc/implicit` endpoint. For example, `https://kibana.example.com/api/security/oidc/callback`. + +`rp.response_type` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) OAuth 2.0 Response Type value that determines the authorization processing flow to be used. Can be `code` for authorization code grant flow, or one of `id_token`, `id_token token` for the implicit flow. + +`rp.signature_algorithm` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The signature algorithm that will be used by {{es}} in order to verify the signature of the id tokens it will receive from the OpenID Connect Provider. Allowed values are `HS256`, `HS384`, `HS512`, `ES256`, `ES384`, `ES512`, `RS256`, `RS384`, `RS512`, `PS256`, `PS384`, `PS512`. Defaults to `RS256`. + +`rp.requested_scopes` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The scope values that will be requested by the OpenID Connect Provider as part of the Authentication Request. Optional, defaults to `openid` + +`rp.post_logout_redirect_uri` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The Redirect URI (usually within {{kib}}) that the OpenID Connect Provider should redirect the browser to after a successful Single Logout. + +`claims.principal` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The name of the OpenID Connect claim that contains the user’s principal (username). + +`claims.groups` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The name of the OpenID Connect claim that contains the user’s groups. + +`claims.name` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The name of the OpenID Connect claim that contains the user’s full name. + +`claims.mail` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The name of the OpenID Connect claim that contains the user’s email address. + +`claims.dn` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The name of the OpenID Connect claim that contains the user’s X.509 *Distinguished Name*. + +`claim_patterns.principal` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A Java regular expression that is matched against the OpenID Connect claim specified by `claims.principal` before it is applied to the user’s *principal* property. The attribute value must match the pattern and the value of the first *capturing group* is used as the principal. For example, `^([^@]+)@example\\.com$` matches email addresses from the "example.com" domain and uses the local-part as the principal. + +`claim_patterns.groups` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) As per `claim_patterns.principal`, but for the *group* property. + +`claim_patterns.name` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) As per `claim_patterns.principal`, but for the *name* property. + +`claim_patterns.mail` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) As per `claim_patterns.principal`, but for the *mail* property. + +`claim_patterns.dn` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) As per `claim_patterns.principal`, but for the *dn* property. + +`allowed_clock_skew` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum allowed clock skew to be taken into consideration when validating id tokens with regards to their creation and expiration times. Defaults to `60s`. + +`populate_user_metadata` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies whether to populate the {{es}} user’s metadata with the values that are provided by the OpenID Connect claims. Defaults to `true`. + +`http.proxy.host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the address of the proxy server that will be used by the internal http client for all back-channel communication to the OpenID Connect Provider endpoints. This includes requests to the Token Endpoint, the Userinfo Endpoint and requests to fetch the JSON Web Key Set from the OP if `op.jwkset_path` is set as a URL. + +`http.proxy.scheme` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the protocol to use to connect to the proxy server that will be used by the http client for all back-channel communication to the OpenID Connect Provider endpoints. Defaults to `http`. Allowed values are `http` or `https`. + +`http.proxy.port` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the port of the proxy server that will be used by the http client for all backchannel communication to the OpenID Connect Provider endpoints. Defaults to `80`. + +`http.connect_timeout` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the behavior of the http client used for back-channel communication to the OpenID Connect Provider endpoints. Specifies the timeout until a connection is established. A value of zero means the timeout is not used. Defaults to `5s`. + +`http.connection_read_timeout` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the behavior of the http client used for back-channel communication to the OpenID Connect Provider endpoints. Specifies the timeout used when requesting a connection from the connection manager. Defaults to `5s` + +`http.socket_timeout` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the behavior of the http client used for back-channel communication to the OpenID Connect Provider endpoints. Specifies the socket timeout (SO_TIMEOUT) in milliseconds, which is the timeout for waiting for data or, put differently, a maximum period inactivity between two consecutive data packets). Defaults to `5s`. + +`http.max_connections` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the behavior of the http client used for back-channel communication to the OpenID Connect Provider endpoints. Specifies the maximum number of connections allowed across all endpoints. Defaults to `200`. + +`http.max_endpoint_connections` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the behavior of the http client used for back-channel communication to the OpenID Connect Provider endpoints. Specifies the maximum number of connections allowed per endpoint. Defaults to `200`. + +`http.tcp.keep_alive` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Whether to enable TCP keepalives on HTTP connections used for back-channel communication to the OpenID Connect Provider endpoints. Defaults to `true`. + +`http.connection_pool_ttl` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the behavior of the http client used for back-channel communication to the OpenID Connect Provider endpoints. Specifies the time-to-live of connections in the connection pool (default to 3 minutes). A connection is closed if it is idle for more than the specified timeout. + +The server can also set the `Keep-Alive` HTTP response header. The effective time-to-live value is the smaller value between this setting and the `Keep-Alive` response header. Configure this setting to `-1` to let the server dictate the value. If the header is not set by the server and the setting has value of `-1`, the time-to-live is infinite and connections never expire. + + +#### OpenID Connect realm SSL settings [ref-oidc-ssl-settings] + +The following settings can be used to configure SSL for all outgoing http connections to the OpenID Connect Provider endpoints. + +::::{note} +These settings are *only* used for the back-channel communication between {{es}} and the OpenID Connect Provider +:::: + + +`ssl.key` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Path to a PEM encoded file containing the private key. + + If HTTP client authentication is required, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + +`ssl.key_passphrase` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. [7.17.0] Prefer `ssl.secure_key_passphrase` instead. + + You cannot use this setting and `ssl.secure_key_passphrase` at the same time. + + +`ssl.secure_key_passphrase` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. + + You cannot use this setting and `ssl.key_passphrase` at the same time. + + +`ssl.certificate` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path for the PEM encoded certificate (or certificate chain) that is associated with the key. + + This setting can be used only if `ssl.key` is set. + + +`ssl.certificate_authorities` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) List of paths to PEM encoded certificate files that should be trusted. + + This setting and `ssl.truststore.path` cannot be used at the same time. + + +`ssl.keystore.path` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`ssl.keystore.type` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the keystore file. It must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or ".pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`ssl.keystore.password` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the keystore. [7.17.0] Prefer `ssl.keystore.secure_password` instead. + +`ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + + You cannot use this setting and `ssl.keystore.password` at the same time. + + +`ssl.keystore.key_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the key in the keystore. The default is the keystore password. [7.17.0] Prefer `ssl.keystore.secure_key_password` instead. + + You cannot use this setting and `ssl.keystore.secure_password` at the same time. + + You cannot use this setting and `ssl.keystore.secure_key_password` at the same time. + + +`ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + + You cannot use this setting and `ssl.keystore.key_password` at the same time. + + +`ssl.truststore.path` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`ssl.truststore.type` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the truststore file. It must be either `jks` or `PKCS12`. If the file name ends in ".p12", ".pfx" or "pkcs12", the default is `PKCS12`. Otherwise, it defaults to `jks`. + +`ssl.truststore.password` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the truststore. [7.17.0] Prefer `ssl.truststore.secure_password` instead. + + You cannot use this setting and `ssl.truststore.secure_password` at the same time. + + +`ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + You cannot use this setting and `ssl.truststore.password` at the same time. + + +`ssl.verification_mode` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the verification of certificates. + + ::::{dropdown} Valid values + `full` + : Validates that the provided certificate: has an issue date that’s within the `not_before` and `not_after` dates; chains to a trusted Certificate Authority (CA); has a `hostname` or IP address that matches the names within the certificate. + + `certificate` + : Validates the provided certificate and verifies that it’s signed by a trusted authority (CA), but doesn’t check the certificate `hostname`. + + `none` + : Performs no certificate validation. + + ::::{important} + Setting certificate validation to `none` disables many security benefits of SSL/TLS, which is very dangerous. Only set this value if instructed by Elastic Support as a temporary diagnostic mechanism when attempting to resolve TLS errors. + :::: + + + :::: + + + Defaults to `full`. + + +`ssl.supported_protocols` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported protocols with versions. Valid protocols: `SSLv2Hello`, `SSLv3`, `TLSv1`, `TLSv1.1`, `TLSv1.2`, `TLSv1.3`. If the JVM’s SSL provider supports TLSv1.3, the default is `TLSv1.3,TLSv1.2,TLSv1.1`. Otherwise, the default is `TLSv1.2,TLSv1.1`. + + {{es}} relies on your JDK’s implementation of SSL and TLS. View [Supported SSL/TLS versions by JDK version](docs-content://deploy-manage/security/supported-ssltls-versions-by-jdk-version.md) for more information. + + ::::{note} + If `xpack.security.fips_mode.enabled` is `true`, you cannot use `SSLv2Hello` or `SSLv3`. See [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md). + :::: + + +`ssl.cipher_suites` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported cipher suites vary depending on which version of Java you use. For example, for version 12 the default value is `TLS_AES_256_GCM_SHA384`, `TLS_AES_128_GCM_SHA256`, `TLS_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA`, `TLS_RSA_WITH_AES_256_GCM_SHA384`, `TLS_RSA_WITH_AES_128_GCM_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA256`, `TLS_RSA_WITH_AES_128_CBC_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA`, `TLS_RSA_WITH_AES_128_CBC_SHA`. + + For more information, see Oracle’s [Java Cryptography Architecture documentation](https://docs.oracle.com/en/java/javase/11/security/oracle-providers.md#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2). + + + +#### JWT realm settings [ref-jwt-settings] + +In addition to the [settings that are valid for all realms](#ref-realm-settings), you can specify the following settings. + +`token_type` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The token type, `id_token` or `access_token`, that the JWT realm uses to verify incoming JWTs. Defaults to `id_token`. + +`allowed_audiences` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A list of allowed JWT audiences that {{es}} should verify. {{es}} will only consume JWTs that were intended for any of these audiences, as denoted by the `aud` claim in the JWT). The audiences are compared with exact string matches and do not support wildcards or regex. Examples of `aud` claim are `https://example.com/client1` and `other_service,elasticsearch`. When `token_type` is `access_token`, the audiences can be optionally denoted by a different claim in the JWT if `aud` does not exist. See also [`fallback_claims.aud`](#security-settings-jwt-fallback-claims-aud). + +`allowed_clock_skew` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum allowed clock skew to be taken into consideration when validating JWTs with regards to their creation, not before, and expiration times. + +`allowed_issuer` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A verifiable Identifier for your JWT Issuer. An Issuer Identifier is usually a case sensitive URL using the https scheme that contains scheme, host, and optionally, port number and path components and no query or fragment components. However, it can be any string. The value for this setting should be provided by your JWT Issuer. The issuer is compared with exact string matches and do not support wildcards or regex. Examples of `iss` claim are `https://example.com:8443/jwt` and `issuer123`. + +`allowed_subjects` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A list of allowed JWT subjects that {{es}} should verify. {{es}} will only consume JWTs that were issued for any of these subjects, as denoted by the `sub` claim in the JWT. The subjects are compared with exact string matches and do not support wildcards or regex. Examples of `sub` claim are `https://example.com/user1` and `user_1,user2`. When `token_type` is `access_token`, this setting is mandatory and the subject can be optionally denoted by a different claim in the JWT if `sub` does not exist. See also [`fallback_claims.sub`](#security-settings-jwt-fallback-claims-sub). + +$$$security-settings-jwt-fallback-claims-sub$$$ + +`fallback_claims.sub` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The alternative claim to look for the subject information if the `sub` claim does not exist. It is configurable only when the `token_type` is `access_token`. The fallback is applied everywhere the `sub` claim is used. + +$$$security-settings-jwt-fallback-claims-aud$$$ + +`fallback_claims.aud` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The alternative claim to look for the audiences information if the `aud` claim does not exist. It is configurable only when the `token_type` is `access_token`. The fallback is applied everywhere the `aud` claim is used. + +`required_claims` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Additional claims and associated values that {{es}} should verify. This is a group setting that takes key/value pairs, where the key is a string and the value must be either a string or an array of strings. The values are compared with exact string matches and do not support wildcards or regex. + +For example: + +```yaml +xpack.security.authc.realms.jwt.jwt1: + required_claims: + token_use: "id" + versions: ["1.0", "2.0"] +``` + +`allowed_signature_algorithms` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) A list of signature algorithms that will be used by {{es}} in order to verify the signature of the JWT it will receive from the JWT Issuer. Defaults to `RS256`. Examples are `HS512,RS512,ES512` and `ES384`. Allowed values are `HS256`, `HS384`, `HS512`, `ES256`, `ES384`, `ES512`, `RS256`, `RS384`, `RS512`, `PS256`, `PS384`, `PS512`. + +`authorization_realms` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The names of the realms that should be consulted for delegated authorization. If this setting is used, then the JWT realm does not perform role mapping and instead loads the user from the listed realms. See [Delegating authorization to another realm](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/realm-chains.md#authorization_realms). + +`claims.dn` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The name of the JWT claim that contains the user’s Distinguished Name (DN), which uniquely identifies a user or group. + +`claim_patterns.dn` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Accepts the same Java regular expression as [`claim_patterns.principal`](#jwt-claim-pattern-principal), but for the `dn` property. + +`claims.groups` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The name of the JWT claim that contains the user’s groups, such as `groups` and `roles`. + +`claim_patterns.group` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Accepts the same Java regular expression as [`claim_patterns.principal`](#jwt-claim-pattern-principal), but for the `group` property. + +`claims.mail` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The name of the JWT claim that contains the user’s e-mail address. + +`claim_patterns.mail` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Accepts the same Java regular expression as [`claim_patterns.principal`](#jwt-claim-pattern-principal), but for the `mail` property. + +`claims.name` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The name of the JWT claim that contains the user’s username. + +`claim_patterns.name` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Accepts the same Java regular expression as [`claim_patterns.principal`](#jwt-claim-pattern-principal), but for the `name` property. + +`claims.principal` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The name of the JWT claim that contains the user’s principal (username), such as `sub`, `name`, `email`, and `dn`. + +$$$jwt-claim-pattern-principal$$$ + +`claim_patterns.principal` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) An optional Java regular expression that is matched against the JWT claim specified by `claims.principal` before it is applied to the user’s `principal` property. The value must match the pattern and the value of the first *capturing group* is used as the principal. For example, `^([^@]+)@example\\.com$` matches email addresses from the `example.com` domain and uses the local-part as the principal. Another example is `sub` which may not need a pattern setting. + +`client_authentication.type` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies whether to use `shared_secret` or `none` to authenticate incoming client requests. If this value is `shared_secret`, the client is authenticated using an HTTP request header that must match a pre-configured secret value. The client must provide this shared secret with every request in the `ES-Client-Authentication` header. If this value is `none`, then the request header `ES-Client-Authentication` is ignored. Defaults to `shared_secret`. + + Enabling client authentication is recommended. If JWT bearer tokens are shared with other clients or services, client authentication restricts which ones are allowed to submit those JWTs to {{es}}. + + +`client_authentication.shared_secret` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Secure](docs-content://deploy-manage/security/secure-settings.md), [reloadable](docs-content://deploy-manage/security/secure-settings.md#reloadable-secure-settings)) Secret value string for client authentication. Required if `client_authentication.type` is `shared_secret`. + +`client_authentication.rotation_grace_period` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets the grace period for how long after rotating the `client_authentication.shared_secret` is valid. `client_authentication.shared_secret` can be rotated by updating the keystore then calling the [reload API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-nodes-reload-secure-settings). Defaults to `1m`. + +`http.connect_timeout` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Sets the timeout for the HTTP client that is used for fetching the JSON Web Key Set from a remote URL. A value of zero means the timeout is not used. Defaults to `5s`. + +`http.connection_read_timeout` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the HTTP timeout used when requesting a connection from the connection manager. Defaults to `5s`. + +`http.socket_timeout` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the maximum socket timeout (SO_TIMEOUT) for the HTTP client to wait for inactivity between two consecutive data packets. Defaults to `5s`. + +`http.max_connections` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the maximum number of connections allowed across all endpoints. + +`http.max_endpoint_connections` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the maximum number of connections allowed per endpoint. + +`jwt.cache.size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the maximum number of JWT cache entries. If clients use a different JWT for every request, set to `0` to disable the JWT cache. Defaults to `100000`. + +`jwt.cache.ttl` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the time-to-live for the period of time to cache JWT entries. JWTs can only be cached if client authentication is successful (or disabled). Uses the standard {{es}} [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units). If clients use a different JWT for every request, set to `0` to disable the JWT cache. Defaults to `20m`. + +`pkc_jwkset_path` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The file name or URL to a JSON Web Key Set (JWKS) with the public key material that the JWT Realm uses for verifying token signatures. A value is considered a file name if it does not begin with `https`. The file name is resolved relative to the {{es}} configuration directory. If a URL is provided, then it must begin with `https://` (`http://` is not supported). {{es}} automatically caches the JWK set and will attempt to refresh the JWK set upon signature verification failure, as this might indicate that the JWT Provider has rotated the signing keys. + +`hmac_jwkset` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Contents of a JSON Web Key Set (JWKS), including the secret key that the JWT realm uses to verify token signatures. This format supports multiple keys and optional attributes, and is preferred over the `hmac_key` setting. Cannot be used in conjunction with the `hmac_key` setting. Refer to [Configure {{es}} to use a JWT realm](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/jwt.md). + +`hmac_key` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Contents of a single JSON Web Key (JWK), including the secret key that the JWT realm uses to verify token signatures. This format only supports a single key without attributes, and cannot be used with the `hmac_jwkset` setting. This format is compatible with OIDC. The HMAC key must be a UNICODE string, where the key bytes are the UTF-8 encoding of the UNICODE string. The `hmac_jwkset` setting is preferred. Refer to [Configure {{es}} to use a JWT realm](docs-content://deploy-manage/users-roles/cluster-or-deployment-auth/jwt.md). + +`populate_user_metadata` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies whether to populate the {{es}} user’s metadata with the values that are provided by the JWT claims. Defaults to `true`. + + +#### JWT realm SSL settings [ref-jwt-ssl-settings] + +The following settings can be used to configure SSL for fetching the JSON Web Key Set from a remote URL. + +::::{note} +These settings are *only* used for the back-channel communication between {{es}} and the JWT Issuer. +:::: + + +`ssl.key` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Path to a PEM encoded file containing the private key. + + If HTTP client authentication is required, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + +`ssl.key_passphrase` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. [7.17.0] Prefer `ssl.secure_key_passphrase` instead. + + You cannot use this setting and `ssl.secure_key_passphrase` at the same time. + + +`ssl.secure_key_passphrase` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. + + You cannot use this setting and `ssl.key_passphrase` at the same time. + + +`ssl.certificate` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path for the PEM encoded certificate (or certificate chain) that is associated with the key. + + This setting can be used only if `ssl.key` is set. + + +`ssl.certificate_authorities` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) List of paths to PEM encoded certificate files that should be trusted. + + This setting and `ssl.truststore.path` cannot be used at the same time. + + +`ssl.keystore.path` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`ssl.keystore.type` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the keystore file. It must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or ".pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`ssl.keystore.password` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the keystore. [7.17.0] Prefer `ssl.keystore.secure_password` instead. + +`ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + + You cannot use this setting and `ssl.keystore.password` at the same time. + + +`ssl.keystore.key_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the key in the keystore. The default is the keystore password. [7.17.0] Prefer `ssl.keystore.secure_key_password` instead. + + You cannot use this setting and `ssl.keystore.secure_password` at the same time. + + You cannot use this setting and `ssl.keystore.secure_key_password` at the same time. + + +`ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + + You cannot use this setting and `ssl.keystore.key_password` at the same time. + + +`ssl.truststore.path` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`ssl.truststore.type` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the truststore file. It must be either `jks` or `PKCS12`. If the file name ends in ".p12", ".pfx" or "pkcs12", the default is `PKCS12`. Otherwise, it defaults to `jks`. + +`ssl.truststore.password` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the truststore. [7.17.0] Prefer `ssl.truststore.secure_password` instead. + + You cannot use this setting and `ssl.truststore.secure_password` at the same time. + + +`ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + You cannot use this setting and `ssl.truststore.password` at the same time. + + +`ssl.verification_mode` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the verification of certificates. + + ::::{dropdown} Valid values + `full` + : Validates that the provided certificate: has an issue date that’s within the `not_before` and `not_after` dates; chains to a trusted Certificate Authority (CA); has a `hostname` or IP address that matches the names within the certificate. + + `certificate` + : Validates the provided certificate and verifies that it’s signed by a trusted authority (CA), but doesn’t check the certificate `hostname`. + + `none` + : Performs no certificate validation. + + ::::{important} + Setting certificate validation to `none` disables many security benefits of SSL/TLS, which is very dangerous. Only set this value if instructed by Elastic Support as a temporary diagnostic mechanism when attempting to resolve TLS errors. + :::: + + + :::: + + + Defaults to `full`. + + +`ssl.supported_protocols` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported protocols with versions. Valid protocols: `SSLv2Hello`, `SSLv3`, `TLSv1`, `TLSv1.1`, `TLSv1.2`, `TLSv1.3`. If the JVM’s SSL provider supports TLSv1.3, the default is `TLSv1.3,TLSv1.2,TLSv1.1`. Otherwise, the default is `TLSv1.2,TLSv1.1`. + + {{es}} relies on your JDK’s implementation of SSL and TLS. View [Supported SSL/TLS versions by JDK version](docs-content://deploy-manage/security/supported-ssltls-versions-by-jdk-version.md) for more information. + + ::::{note} + If `xpack.security.fips_mode.enabled` is `true`, you cannot use `SSLv2Hello` or `SSLv3`. See [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md). + :::: + + +`ssl.cipher_suites` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported cipher suites vary depending on which version of Java you use. For example, for version 12 the default value is `TLS_AES_256_GCM_SHA384`, `TLS_AES_128_GCM_SHA256`, `TLS_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA`, `TLS_RSA_WITH_AES_256_GCM_SHA384`, `TLS_RSA_WITH_AES_128_GCM_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA256`, `TLS_RSA_WITH_AES_128_CBC_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA`, `TLS_RSA_WITH_AES_128_CBC_SHA`. + + For more information, see Oracle’s [Java Cryptography Architecture documentation](https://docs.oracle.com/en/java/javase/11/security/oracle-providers.md#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2). + + + +#### Load balancing and failover [load-balancing] + +The [static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting) `load_balance.type` setting can have the following values: + +* `failover`: The URLs specified are used in the order that they are specified. The first server that can be connected to will be used for all subsequent connections. If a connection to that server fails then the next server that a connection can be established to will be used for subsequent connections. +* `dns_failover`: In this mode of operation, only a single URL may be specified. This URL must contain a DNS name. The system will be queried for all IP addresses that correspond to this DNS name. Connections to the Active Directory or LDAP server will always be tried in the order in which they were retrieved. This differs from `failover` in that there is no reordering of the list and if a server has failed at the beginning of the list, it will still be tried for each subsequent connection. +* `round_robin`: Connections will continuously iterate through the list of provided URLs. If a server is unavailable, iterating through the list of URLs will continue until a successful connection is made. +* `dns_round_robin`: In this mode of operation, only a single URL may be specified. This URL must contain a DNS name. The system will be queried for all IP addresses that correspond to this DNS name. Connections will continuously iterate through the list of addresses. If a server is unavailable, iterating through the list of URLs will continue until a successful connection is made. + + +### General TLS settings [ssl-tls-settings] + +`xpack.security.ssl.diagnose.trust` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls whether to output diagnostic messages for SSL/TLS trust failures. If this is `true` (the default), a message will be printed to the Elasticsearch log whenever an SSL connection (incoming or outgoing) is rejected due to a failure to establish trust. This diagnostic message contains information that can be used to determine the cause of the failure and assist with resolving the problem. Set to `false` to disable these messages. + + +#### TLS/SSL key and trusted certificate settings [tls-ssl-key-settings] + +The following settings are used to specify a private key, certificate, and the trusted certificates that should be used when communicating over an SSL/TLS connection. If no trusted certificates are configured, the default certificates that are trusted by the JVM will be trusted along with the certificate(s) associated with a key in the same context. The key and certificate must be in place for connections that require client authentication or when acting as a SSL enabled server. + +::::{note} +:name: pkcs12-truststore-note + +Storing trusted certificates in a PKCS#12 file, although supported, is uncommon in practice. The [`elasticsearch-certutil` tool](/reference/elasticsearch/command-line-tools/certutil.md), as well as Java’s `keytool`, are designed to generate PKCS#12 files that can be used both as a keystore and as a truststore, but this may not be the case for container files that are created using other tools. Usually, PKCS#12 files only contain secret and private entries. To confirm that a PKCS#12 container includes trusted certificate ("anchor") entries look for `2.16.840.1.113894.746875.1.1: ` in the `openssl pkcs12 -info` output, or `trustedCertEntry` in the `keytool -list` output. +:::: + + +## HTTP TLS/SSL settings [http-tls-ssl-settings] + +You can configure the following TLS/SSL settings. + +`xpack.security.http.ssl.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Used to enable or disable TLS/SSL on the HTTP networking layer, which {{es}} uses to communicate with other clients. The default is `false`. + +`xpack.security.http.ssl.supported_protocols` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported protocols with versions. Valid protocols: `SSLv2Hello`, `SSLv3`, `TLSv1`, `TLSv1.1`, `TLSv1.2`, `TLSv1.3`. If the JVM’s SSL provider supports TLSv1.3, the default is `TLSv1.3,TLSv1.2,TLSv1.1`. Otherwise, the default is `TLSv1.2,TLSv1.1`. + + {{es}} relies on your JDK’s implementation of SSL and TLS. View [Supported SSL/TLS versions by JDK version](docs-content://deploy-manage/security/supported-ssltls-versions-by-jdk-version.md) for more information. + + ::::{note} + If `xpack.security.fips_mode.enabled` is `true`, you cannot use `SSLv2Hello` or `SSLv3`. See [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md). + :::: + + +`xpack.security.http.ssl.client_authentication` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the server’s behavior in regard to requesting a certificate from client connections. Valid values are `required`, `optional`, and `none`. `required` forces a client to present a certificate, while `optional` requests a client certificate but the client is not required to present one. Defaults to `none`. + +`xpack.security.http.ssl.verification_mode` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The SSL settings in `xpack.security.http.ssl` control a *server context* for TLS, which defines the settings for the TLS connection. The use of `verification_mode` in a TLS *server* is discouraged. Defines how to verify the certificates presented by another party in the TLS connection: + + ::::{dropdown} Valid values + `full` + : Validates that the provided certificate: has an issue date that’s within the `not_before` and `not_after` dates; chains to a trusted Certificate Authority (CA); has a `hostname` or IP address that matches the names within the certificate. + + `certificate` + : Validates the provided certificate and verifies that it’s signed by a trusted authority (CA), but doesn’t check the certificate `hostname`. + + `none` + : Performs no certificate validation. + + ::::{important} + Setting certificate validation to `none` disables many security benefits of SSL/TLS, which is very dangerous. Only set this value if instructed by Elastic Support as a temporary diagnostic mechanism when attempting to resolve TLS errors. + :::: + + + :::: + + + Defaults to `full`. + + +`xpack.security.http.ssl.cipher_suites` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported cipher suites vary depending on which version of Java you use. For example, for version 12 the default value is `TLS_AES_256_GCM_SHA384`, `TLS_AES_128_GCM_SHA256`, `TLS_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA`, `TLS_RSA_WITH_AES_256_GCM_SHA384`, `TLS_RSA_WITH_AES_128_GCM_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA256`, `TLS_RSA_WITH_AES_128_CBC_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA`, `TLS_RSA_WITH_AES_128_CBC_SHA`. + + For more information, see Oracle’s [Java Cryptography Architecture documentation](https://docs.oracle.com/en/java/javase/11/security/oracle-providers.md#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2). + + +### HTTP TLS/SSL key and trusted certificate settings [security-http-tls-ssl-key-trusted-certificate-settings] + +The following settings are used to specify a private key, certificate, and the trusted certificates that should be used when communicating over an SSL/TLS connection. A private key and certificate must be configured. + + +### PEM encoded files [_pem_encoded_files_2] + +When using PEM encoded files, use the following settings: + +`xpack.security.http.ssl.key` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Path to a PEM encoded file containing the private key. + + If HTTP client authentication is required, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + +`xpack.security.http.ssl.key_passphrase` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. [7.17.0] Prefer `ssl.secure_key_passphrase` instead. + + You cannot use this setting and `ssl.secure_key_passphrase` at the same time. + + +`xpack.security.http.ssl.secure_key_passphrase` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. + +`xpack.security.http.ssl.certificate` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path for the PEM encoded certificate (or certificate chain) that is associated with the key. + + This setting can be used only if `ssl.key` is set. + + +`xpack.security.http.ssl.certificate_authorities` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) List of paths to PEM encoded certificate files that should be trusted. + + This setting and `ssl.truststore.path` cannot be used at the same time. + + + +### Java keystore files [_java_keystore_files_2] + +When using Java keystore files (JKS), which contain the private key, certificate and certificates that should be trusted, use the following settings: + +`xpack.security.http.ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`xpack.security.http.ssl.keystore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the keystore. [7.17.0] Prefer `ssl.keystore.secure_password` instead. + +`xpack.security.http.ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + +`xpack.security.http.ssl.keystore.key_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the key in the keystore. The default is the keystore password. [7.17.0] Prefer `ssl.keystore.secure_key_password` instead. + + You cannot use this setting and `ssl.keystore.secure_password` at the same time. + + +`xpack.security.http.ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + +`xpack.security.http.ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`xpack.security.http.ssl.truststore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the truststore. [7.17.0] Prefer `ssl.truststore.secure_password` instead. + + You cannot use this setting and `ssl.truststore.secure_password` at the same time. + + +`xpack.security.http.ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + +### PKCS#12 files [security-http-pkcs12-files] + +{{es}} can be configured to use PKCS#12 container files (`.p12` or `.pfx` files) that contain the private key, certificate and certificates that should be trusted. + +PKCS#12 files are configured in the same way as Java keystore files: + +`xpack.security.http.ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`xpack.security.http.ssl.keystore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the keystore file. It must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or ".pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`xpack.security.http.ssl.keystore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the keystore. [7.17.0] Prefer `ssl.keystore.secure_password` instead. + +`xpack.security.http.ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + +`xpack.security.http.ssl.keystore.key_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the key in the keystore. The default is the keystore password. [7.17.0] Prefer `ssl.keystore.secure_key_password` instead. + + You cannot use this setting and `ssl.keystore.secure_password` at the same time. + + +`xpack.security.http.ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + +`xpack.security.http.ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`xpack.security.http.ssl.truststore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set this to `PKCS12` to indicate that the truststore is a PKCS#12 file. + +`xpack.security.http.ssl.truststore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the truststore. [7.17.0] Prefer `ssl.truststore.secure_password` instead. + + You cannot use this setting and `ssl.truststore.secure_password` at the same time. + + +`xpack.security.http.ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + + +## Transport TLS/SSL settings [transport-tls-ssl-settings] + +You can configure the following TLS/SSL settings. + +`xpack.security.transport.ssl.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Used to enable or disable TLS/SSL on the transport networking layer, which nodes use to communicate with each other. The default is `false`. + +`xpack.security.transport.ssl.supported_protocols` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported protocols with versions. Valid protocols: `SSLv2Hello`, `SSLv3`, `TLSv1`, `TLSv1.1`, `TLSv1.2`, `TLSv1.3`. If the JVM’s SSL provider supports TLSv1.3, the default is `TLSv1.3,TLSv1.2,TLSv1.1`. Otherwise, the default is `TLSv1.2,TLSv1.1`. + + {{es}} relies on your JDK’s implementation of SSL and TLS. View [Supported SSL/TLS versions by JDK version](docs-content://deploy-manage/security/supported-ssltls-versions-by-jdk-version.md) for more information. + + ::::{note} + If `xpack.security.fips_mode.enabled` is `true`, you cannot use `SSLv2Hello` or `SSLv3`. See [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md). + :::: + + +`xpack.security.transport.ssl.client_authentication` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the server’s behavior in regard to requesting a certificate from client connections. Valid values are `required`, `optional`, and `none`. `required` forces a client to present a certificate, while `optional` requests a client certificate but the client is not required to present one. Defaults to `required`. + +`xpack.security.transport.ssl.verification_mode` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Defines how to verify the certificates presented by another party in the TLS connection: + + ::::{dropdown} Valid values + `full` + : Validates that the provided certificate: has an issue date that’s within the `not_before` and `not_after` dates; chains to a trusted Certificate Authority (CA); has a `hostname` or IP address that matches the names within the certificate. + + `certificate` + : Validates the provided certificate and verifies that it’s signed by a trusted authority (CA), but doesn’t check the certificate `hostname`. + + `none` + : Performs no certificate validation. + + ::::{important} + Setting certificate validation to `none` disables many security benefits of SSL/TLS, which is very dangerous. Only set this value if instructed by Elastic Support as a temporary diagnostic mechanism when attempting to resolve TLS errors. + :::: + + + :::: + + + Defaults to `full`. + + +`xpack.security.transport.ssl.cipher_suites` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported cipher suites vary depending on which version of Java you use. For example, for version 12 the default value is `TLS_AES_256_GCM_SHA384`, `TLS_AES_128_GCM_SHA256`, `TLS_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA`, `TLS_RSA_WITH_AES_256_GCM_SHA384`, `TLS_RSA_WITH_AES_128_GCM_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA256`, `TLS_RSA_WITH_AES_128_CBC_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA`, `TLS_RSA_WITH_AES_128_CBC_SHA`. + + For more information, see Oracle’s [Java Cryptography Architecture documentation](https://docs.oracle.com/en/java/javase/11/security/oracle-providers.md#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2). + + +### Transport TLS/SSL key and trusted certificate settings [security-transport-tls-ssl-key-trusted-certificate-settings] + +The following settings are used to specify a private key, certificate, and the trusted certificates that should be used when communicating over an SSL/TLS connection. A private key and certificate must be configured. + + +### PEM encoded files [_pem_encoded_files_3] + +When using PEM encoded files, use the following settings: + +`xpack.security.transport.ssl.key` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Path to a PEM encoded file containing the private key. + + If HTTP client authentication is required, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + +`xpack.security.transport.ssl.key_passphrase` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. [7.17.0] Prefer `ssl.secure_key_passphrase` instead. + + You cannot use this setting and `ssl.secure_key_passphrase` at the same time. + + +`xpack.security.transport.ssl.secure_key_passphrase` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. + +`xpack.security.transport.ssl.certificate` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path for the PEM encoded certificate (or certificate chain) that is associated with the key. + + This setting can be used only if `ssl.key` is set. + + +`xpack.security.transport.ssl.certificate_authorities` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) List of paths to PEM encoded certificate files that should be trusted. + + This setting and `ssl.truststore.path` cannot be used at the same time. + + + +### Java keystore files [_java_keystore_files_3] + +When using Java keystore files (JKS), which contain the private key, certificate and certificates that should be trusted, use the following settings: + +`xpack.security.transport.ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`xpack.security.transport.ssl.keystore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the keystore. [7.17.0] Prefer `ssl.keystore.secure_password` instead. + +`xpack.security.transport.ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + +`xpack.security.transport.ssl.keystore.key_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the key in the keystore. The default is the keystore password. [7.17.0] Prefer `ssl.keystore.secure_key_password` instead. + + You cannot use this setting and `ssl.keystore.secure_password` at the same time. + + +`xpack.security.transport.ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + +`xpack.security.transport.ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`xpack.security.transport.ssl.truststore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the truststore. [7.17.0] Prefer `ssl.truststore.secure_password` instead. + + You cannot use this setting and `ssl.truststore.secure_password` at the same time. + + +`xpack.security.transport.ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + +### PKCS#12 files [security-transport-pkcs12-files] + +{{es}} can be configured to use PKCS#12 container files (`.p12` or `.pfx` files) that contain the private key, certificate and certificates that should be trusted. + +PKCS#12 files are configured in the same way as Java keystore files: + +`xpack.security.transport.ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`xpack.security.transport.ssl.keystore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the keystore file. It must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or ".pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`xpack.security.transport.ssl.keystore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the keystore. [7.17.0] Prefer `ssl.keystore.secure_password` instead. + +`xpack.security.transport.ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + +`xpack.security.transport.ssl.keystore.key_password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the key in the keystore. The default is the keystore password. [7.17.0] Prefer `ssl.keystore.secure_key_password` instead. + + You cannot use this setting and `ssl.keystore.secure_password` at the same time. + + +`xpack.security.transport.ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + +`xpack.security.transport.ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`xpack.security.transport.ssl.truststore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set this to `PKCS12` to indicate that the truststore is a PKCS#12 file. + +`xpack.security.transport.ssl.truststore.password` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The password for the truststore. [7.17.0] Prefer `ssl.truststore.secure_password` instead. + + You cannot use this setting and `ssl.truststore.secure_password` at the same time. + + +`xpack.security.transport.ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + + +## Remote cluster server (API key based model) TLS/SSL settings [remote-cluster-server-tls-ssl-settings] + +You can configure the following TLS/SSL settings. + +`xpack.security.remote_cluster_server.ssl.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Used to enable or disable TLS/SSL on the remote cluster server networking layer, which {{es}} uses to communicate with remote cluster clients. The default is `true`. + +`xpack.security.remote_cluster_server.ssl.supported_protocols` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported protocols with versions. Valid protocols: `SSLv2Hello`, `SSLv3`, `TLSv1`, `TLSv1.1`, `TLSv1.2`, `TLSv1.3`. If the JVM’s SSL provider supports TLSv1.3, the default is `TLSv1.3,TLSv1.2,TLSv1.1`. Otherwise, the default is `TLSv1.2,TLSv1.1`. + + {{es}} relies on your JDK’s implementation of SSL and TLS. View [Supported SSL/TLS versions by JDK version](docs-content://deploy-manage/security/supported-ssltls-versions-by-jdk-version.md) for more information. + + ::::{note} + If `xpack.security.fips_mode.enabled` is `true`, you cannot use `SSLv2Hello` or `SSLv3`. See [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md). + :::: + + +`xpack.security.remote_cluster_server.ssl.client_authentication` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Controls the server’s behavior in regard to requesting a certificate from client connections. Valid values are `required`, `optional`, and `none`. `required` forces a client to present a certificate, while `optional` requests a client certificate but the client is not required to present one. Defaults to `none`. + +`xpack.security.remote_cluster_server.ssl.verification_mode` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The SSL settings in `xpack.security.remote_cluster_server.ssl` control a *server context* for TLS, which defines the settings for the TLS connection. The use of `verification_mode` in a TLS *server* is discouraged. Defines how to verify the certificates presented by another party in the TLS connection: + + ::::{dropdown} Valid values + `full` + : Validates that the provided certificate: has an issue date that’s within the `not_before` and `not_after` dates; chains to a trusted Certificate Authority (CA); has a `hostname` or IP address that matches the names within the certificate. + + `certificate` + : Validates the provided certificate and verifies that it’s signed by a trusted authority (CA), but doesn’t check the certificate `hostname`. + + `none` + : Performs no certificate validation. + + ::::{important} + Setting certificate validation to `none` disables many security benefits of SSL/TLS, which is very dangerous. Only set this value if instructed by Elastic Support as a temporary diagnostic mechanism when attempting to resolve TLS errors. + :::: + + + :::: + + + Defaults to `full`. + + +`xpack.security.remote_cluster_server.ssl.cipher_suites` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported cipher suites vary depending on which version of Java you use. For example, for version 12 the default value is `TLS_AES_256_GCM_SHA384`, `TLS_AES_128_GCM_SHA256`, `TLS_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA`, `TLS_RSA_WITH_AES_256_GCM_SHA384`, `TLS_RSA_WITH_AES_128_GCM_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA256`, `TLS_RSA_WITH_AES_128_CBC_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA`, `TLS_RSA_WITH_AES_128_CBC_SHA`. + + For more information, see Oracle’s [Java Cryptography Architecture documentation](https://docs.oracle.com/en/java/javase/11/security/oracle-providers.md#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2). + + +### Remote cluster server (API key based model) TLS/SSL key and trusted certificate settings [security-remote-cluster-server-tls-ssl-key-trusted-certificate-settings] + +The following settings are used to specify a private key, certificate, and the trusted certificates that should be used when communicating over an SSL/TLS connection. A private key and certificate must be configured. + + +### PEM encoded files [_pem_encoded_files_4] + +When using PEM encoded files, use the following settings: + +`xpack.security.remote_cluster_server.ssl.key` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Path to a PEM encoded file containing the private key. + + If HTTP client authentication is required, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + +`xpack.security.remote_cluster_server.ssl.secure_key_passphrase` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. + +`xpack.security.remote_cluster_server.ssl.certificate` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path for the PEM encoded certificate (or certificate chain) that is associated with the key. + + This setting can be used only if `ssl.key` is set. + + +`xpack.security.remote_cluster_server.ssl.certificate_authorities` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) List of paths to PEM encoded certificate files that should be trusted. + + This setting and `ssl.truststore.path` cannot be used at the same time. + + + +### Java keystore files [_java_keystore_files_4] + +When using Java keystore files (JKS), which contain the private key, certificate and certificates that should be trusted, use the following settings: + +`xpack.security.remote_cluster_server.ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`xpack.security.remote_cluster_server.ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + +`xpack.security.remote_cluster_server.ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + +`xpack.security.remote_cluster_server.ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`xpack.security.remote_cluster_server.ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + +### PKCS#12 files [security-remote-cluster-server-pkcs12-files] + +{{es}} can be configured to use PKCS#12 container files (`.p12` or `.pfx` files) that contain the private key, certificate and certificates that should be trusted. + +PKCS#12 files are configured in the same way as Java keystore files: + +`xpack.security.remote_cluster_server.ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`xpack.security.remote_cluster_server.ssl.keystore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the keystore file. It must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or ".pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`xpack.security.remote_cluster_server.ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + +`xpack.security.remote_cluster_server.ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + +`xpack.security.remote_cluster_server.ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`xpack.security.remote_cluster_server.ssl.truststore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set this to `PKCS12` to indicate that the truststore is a PKCS#12 file. + +`xpack.security.remote_cluster_server.ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + + +## Remote cluster client (API key based model) TLS/SSL settings [remote-cluster-client-tls-ssl-settings] + +You can configure the following TLS/SSL settings. + +`xpack.security.remote_cluster_client.ssl.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Used to enable or disable TLS/SSL on the remote cluster client networking layer, which {{es}} uses to communicate with remote cluster servers. The default is `true`. + +`xpack.security.remote_cluster_client.ssl.supported_protocols` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported protocols with versions. Valid protocols: `SSLv2Hello`, `SSLv3`, `TLSv1`, `TLSv1.1`, `TLSv1.2`, `TLSv1.3`. If the JVM’s SSL provider supports TLSv1.3, the default is `TLSv1.3,TLSv1.2,TLSv1.1`. Otherwise, the default is `TLSv1.2,TLSv1.1`. + + {{es}} relies on your JDK’s implementation of SSL and TLS. View [Supported SSL/TLS versions by JDK version](docs-content://deploy-manage/security/supported-ssltls-versions-by-jdk-version.md) for more information. + + ::::{note} + If `xpack.security.fips_mode.enabled` is `true`, you cannot use `SSLv2Hello` or `SSLv3`. See [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md). + :::: + + +`xpack.security.remote_cluster_client.ssl.verification_mode` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Defines how to verify the certificates presented by another party in the TLS connection: + + ::::{dropdown} Valid values + `full` + : Validates that the provided certificate: has an issue date that’s within the `not_before` and `not_after` dates; chains to a trusted Certificate Authority (CA); has a `hostname` or IP address that matches the names within the certificate. + + `certificate` + : Validates the provided certificate and verifies that it’s signed by a trusted authority (CA), but doesn’t check the certificate `hostname`. + + `none` + : Performs no certificate validation. + + ::::{important} + Setting certificate validation to `none` disables many security benefits of SSL/TLS, which is very dangerous. Only set this value if instructed by Elastic Support as a temporary diagnostic mechanism when attempting to resolve TLS errors. + :::: + + + :::: + + + Defaults to `full`. + + +`xpack.security.remote_cluster_client.ssl.cipher_suites` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported cipher suites vary depending on which version of Java you use. For example, for version 12 the default value is `TLS_AES_256_GCM_SHA384`, `TLS_AES_128_GCM_SHA256`, `TLS_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA`, `TLS_RSA_WITH_AES_256_GCM_SHA384`, `TLS_RSA_WITH_AES_128_GCM_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA256`, `TLS_RSA_WITH_AES_128_CBC_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA`, `TLS_RSA_WITH_AES_128_CBC_SHA`. + + For more information, see Oracle’s [Java Cryptography Architecture documentation](https://docs.oracle.com/en/java/javase/11/security/oracle-providers.md#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2). + + +### Remote cluster client (API key based model) TLS/SSL key and trusted certificate settings [security-remote-cluster-client-tls-ssl-key-trusted-certificate-settings] + +The following settings are used to specify a private key, certificate, and the trusted certificates that should be used when communicating over an SSL/TLS connection. A private key and certificate are optional and would be used if the server requires client authentication for PKI authentication. + + +### PEM encoded files [_pem_encoded_files_5] + +When using PEM encoded files, use the following settings: + +`xpack.security.remote_cluster_client.ssl.key` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Path to a PEM encoded file containing the private key. + + If HTTP client authentication is required, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + +`xpack.security.remote_cluster_client.ssl.secure_key_passphrase` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. + +`xpack.security.remote_cluster_client.ssl.certificate` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path for the PEM encoded certificate (or certificate chain) that is associated with the key. + + This setting can be used only if `ssl.key` is set. + + +`xpack.security.remote_cluster_client.ssl.certificate_authorities` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) List of paths to PEM encoded certificate files that should be trusted. + + This setting and `ssl.truststore.path` cannot be used at the same time. + + + +### Java keystore files [_java_keystore_files_5] + +When using Java keystore files (JKS), which contain the private key, certificate and certificates that should be trusted, use the following settings: + +`xpack.security.remote_cluster_client.ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`xpack.security.remote_cluster_client.ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + +`xpack.security.remote_cluster_client.ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + +`xpack.security.remote_cluster_client.ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`xpack.security.remote_cluster_client.ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + +### PKCS#12 files [security-remote-cluster-client-pkcs12-files] + +{{es}} can be configured to use PKCS#12 container files (`.p12` or `.pfx` files) that contain the private key, certificate and certificates that should be trusted. + +PKCS#12 files are configured in the same way as Java keystore files: + +`xpack.security.remote_cluster_client.ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`xpack.security.remote_cluster_client.ssl.keystore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the keystore file. It must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or ".pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`xpack.security.remote_cluster_client.ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + +`xpack.security.remote_cluster_client.ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + +`xpack.security.remote_cluster_client.ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`xpack.security.remote_cluster_client.ssl.truststore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set this to `PKCS12` to indicate that the truststore is a PKCS#12 file. + +`xpack.security.remote_cluster_client.ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + +#### Transport profile TLS/SSL settings [ssl-tls-profile-settings] + +The same settings that are available for the [default transport](#transport-tls-ssl-settings) are also available for each transport profile. By default, the settings for a transport profile will be the same as the default transport unless they are specified. + +As an example, lets look at the key setting. For the default transport this is `xpack.security.transport.ssl.key`. In order to use this setting in a transport profile, use the prefix `transport.profiles.$PROFILE.xpack.security.` and append the portion of the setting after `xpack.security.transport.`. For the key setting, this would be `transport.profiles.$PROFILE.xpack.security.ssl.key`. + + +## IP filtering settings [ip-filtering-settings] + +You can configure the following settings for [IP filtering](docs-content://deploy-manage/security/ip-traffic-filtering.md). + +`xpack.security.transport.filter.allow` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) List of IP addresses to allow. + +`xpack.security.transport.filter.deny` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) List of IP addresses to deny. + +`xpack.security.http.filter.allow` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) List of IP addresses to allow just for HTTP. + +`xpack.security.http.filter.deny` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) List of IP addresses to deny just for HTTP. + +`transport.profiles.$PROFILE.xpack.security.filter.allow` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) List of IP addresses to allow for this profile. + +`transport.profiles.$PROFILE.xpack.security.filter.deny` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) List of IP addresses to deny for this profile. + +`xpack.security.remote_cluster.filter.allow` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) List of IP addresses to allow just for the [remote cluster server configured with the API key based model](docs-content://deploy-manage/remote-clusters/remote-clusters-api-key.md). + +`xpack.security.remote_cluster.filter.deny` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) List of IP addresses to deny just for the remote cluster server configured with the [API key based model](docs-content://deploy-manage/remote-clusters/remote-clusters-api-key.md). + + +## User cache and password hash algorithms [hashing-settings] + +Certain realms store user credentials in memory. To limit exposure to credential theft and mitigate credential compromise, the cache only stores a hashed version of the user credentials in memory. By default, the user cache is hashed with a salted `sha-256` hash algorithm. You can use a different hashing algorithm by setting the [static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting) `cache.hash_algo` realm settings to any of the following values: + +$$$cache-hash-algo$$$ + +| | | | | +| --- | --- | --- | --- | +| Algorithm | | | Description | +| `ssha256` | | | Uses a salted `sha-256` algorithm (default). | +| `md5` | | | Uses `MD5` algorithm. | +| `sha1` | | | Uses `SHA1` algorithm. | +| `bcrypt` | | | Uses `bcrypt` algorithm with salt generated in 1024 rounds. | +| `bcrypt4` | | | Uses `bcrypt` algorithm with salt generated in 16 rounds. | +| `bcrypt5` | | | Uses `bcrypt` algorithm with salt generated in 32 rounds. | +| `bcrypt6` | | | Uses `bcrypt` algorithm with salt generated in 64 rounds. | +| `bcrypt7` | | | Uses `bcrypt` algorithm with salt generated in 128 rounds. | +| `bcrypt8` | | | Uses `bcrypt` algorithm with salt generated in 256 rounds. | +| `bcrypt9` | | | Uses `bcrypt` algorithm with salt generated in 512 rounds. | +| `pbkdf2` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 10000 iterations. | +| `pbkdf2_1000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 1000 iterations. | +| `pbkdf2_10000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 10000 iterations. | +| `pbkdf2_50000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 50000 iterations. | +| `pbkdf2_100000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 100000 iterations. | +| `pbkdf2_500000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 500000 iterations. | +| `pbkdf2_1000000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 1000000 iterations. | +| `pbkdf2_stretch` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 10000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_1000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 1000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_10000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 10000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_50000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 50000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_100000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 100000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_500000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 500000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_1000000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 1000000 iterations, after hashing the initial input with SHA512 first. | +| `noop`,`clear_text` | | | Doesn’t hash the credentials and keeps it in clear text in memory. CAUTION: keeping clear text is considered insecure and can be compromised at the OS level (for example through memory dumps and using `ptrace`). | + +Likewise, realms that store passwords hash them using cryptographically strong and password-specific salt values. You can configure the algorithm for password hashing by setting the [static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting) `xpack.security.authc.password_hashing.algorithm` setting to one of the following: + +$$$password-hashing-algorithms$$$ + +| Algorithm | | | Description | +| --- | --- | --- | --- | +| `bcrypt` | | | Uses `bcrypt` algorithm with salt generated in 1024 rounds. (default) | +| `bcrypt4` | | | Uses `bcrypt` algorithm with salt generated in 16 rounds. | +| `bcrypt5` | | | Uses `bcrypt` algorithm with salt generated in 32 rounds. | +| `bcrypt6` | | | Uses `bcrypt` algorithm with salt generated in 64 rounds. | +| `bcrypt7` | | | Uses `bcrypt` algorithm with salt generated in 128 rounds. | +| `bcrypt8` | | | Uses `bcrypt` algorithm with salt generated in 256 rounds. | +| `bcrypt9` | | | Uses `bcrypt` algorithm with salt generated in 512 rounds. | +| `bcrypt10` | | | Uses `bcrypt` algorithm with salt generated in 1024 rounds. | +| `bcrypt11` | | | Uses `bcrypt` algorithm with salt generated in 2048 rounds. | +| `bcrypt12` | | | Uses `bcrypt` algorithm with salt generated in 4096 rounds. | +| `bcrypt13` | | | Uses `bcrypt` algorithm with salt generated in 8192 rounds. | +| `bcrypt14` | | | Uses `bcrypt` algorithm with salt generated in 16384 rounds. | +| `pbkdf2` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 10000 iterations. | +| `pbkdf2_1000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 1000 iterations. | +| `pbkdf2_10000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 10000 iterations. | +| `pbkdf2_50000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 50000 iterations. | +| `pbkdf2_100000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 100000 iterations. | +| `pbkdf2_500000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 500000 iterations. | +| `pbkdf2_1000000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 1000000 iterations. | +| `pbkdf2_stretch` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 10000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_1000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 1000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_10000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 10000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_50000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 50000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_100000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 100000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_500000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 500000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_1000000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 1000000 iterations, after hashing the initial input with SHA512 first. | + +Furthermore, {{es}} supports authentication via securely-generated high entropy tokens, for instance [API keys](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-security-create-api-key). Analogous to passwords, only the tokens' hashes are stored. Since the tokens are guaranteed to have sufficiently high entropy to resist offline attacks, secure salted hash functions are supported in addition to the password-hashing algorithms mentioned above. + +You can configure the algorithm for API key stored credential hashing by setting the [static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting) `xpack.security.authc.api_key.hashing.algorithm` setting to one of the following + +$$$secure-token-hashing-algorithms$$$ + +| Algorithm | | | Description | +| --- | --- | --- | --- | +| `ssha256` | | | Uses a salted `sha-256` algorithm. (default) | +| `bcrypt` | | | Uses `bcrypt` algorithm with salt generated in 1024 rounds. | +| `bcrypt4` | | | Uses `bcrypt` algorithm with salt generated in 16 rounds. | +| `bcrypt5` | | | Uses `bcrypt` algorithm with salt generated in 32 rounds. | +| `bcrypt6` | | | Uses `bcrypt` algorithm with salt generated in 64 rounds. | +| `bcrypt7` | | | Uses `bcrypt` algorithm with salt generated in 128 rounds. | +| `bcrypt8` | | | Uses `bcrypt` algorithm with salt generated in 256 rounds. | +| `bcrypt9` | | | Uses `bcrypt` algorithm with salt generated in 512 rounds. | +| `bcrypt10` | | | Uses `bcrypt` algorithm with salt generated in 1024 rounds. | +| `bcrypt11` | | | Uses `bcrypt` algorithm with salt generated in 2048 rounds. | +| `bcrypt12` | | | Uses `bcrypt` algorithm with salt generated in 4096 rounds. | +| `bcrypt13` | | | Uses `bcrypt` algorithm with salt generated in 8192 rounds. | +| `bcrypt14` | | | Uses `bcrypt` algorithm with salt generated in 16384 rounds. | +| `pbkdf2` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 10000 iterations. | +| `pbkdf2_1000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 1000 iterations. | +| `pbkdf2_10000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 10000 iterations. | +| `pbkdf2_50000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 50000 iterations. | +| `pbkdf2_100000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 100000 iterations. | +| `pbkdf2_500000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 500000 iterations. | +| `pbkdf2_1000000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 1000000 iterations. | +| `pbkdf2_stretch` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 10000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_1000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 1000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_10000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 10000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_50000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 50000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_100000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 100000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_500000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 500000 iterations, after hashing the initial input with SHA512 first. | +| `pbkdf2_stretch_1000000` | | | Uses `PBKDF2` key derivation function with `HMAC-SHA512` as a pseudorandom function using 1000000 iterations, after hashing the initial input with SHA512 first. | + + + diff --git a/docs/reference/elasticsearch/configuration-reference/shard-request-cache-settings.md b/docs/reference/elasticsearch/configuration-reference/shard-request-cache-settings.md new file mode 100644 index 0000000000000..2e0663980604d --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/shard-request-cache-settings.md @@ -0,0 +1,16 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/shard-request-cache-settings.html +navigation_title: Shard request cache +--- +# Shard request cache settings [shard-request-cache-settings] + +The following settings affect the behavior of the [shard request cache](/reference/elasticsearch/rest-apis/shard-request-cache.md). + +## Cache settings [_cache_settings] + +`indices.requests.cache.size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum size of the cache, as a percentage of the heap. Default: `1%`. + +`indices.requests.cache.expire` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The TTL for cached results. Stale results are automatically invalidated when the index is refreshed, so you shouldn’t need to use this setting. diff --git a/docs/reference/elasticsearch/configuration-reference/snapshot-restore-settings.md b/docs/reference/elasticsearch/configuration-reference/snapshot-restore-settings.md new file mode 100644 index 0000000000000..5d1cc998b580c --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/snapshot-restore-settings.md @@ -0,0 +1,47 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/snapshot-settings.html +--- + +# Snapshot and restore settings [snapshot-settings] + +The following cluster settings configure [snapshot and restore](docs-content://deploy-manage/tools/snapshot-and-restore.md). + +$$$snapshot-max-concurrent-ops$$$ + +`snapshot.max_concurrent_operations` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), integer) Maximum number of concurrent snapshot operations. Defaults to `1000`. + + This limit applies in total to all ongoing snapshot creation, cloning, and deletion operations. {{es}} will reject any operations that would exceed this limit. + + +## {{slm-init}} settings [_slm_init_settings] + +The following cluster settings configure [{{slm}} ({{slm-init}})](docs-content://deploy-manage/tools/snapshot-and-restore/create-snapshots.md#automate-snapshots-slm). + +$$$slm-history-index-enabled$$$ + +`slm.history_index_enabled` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), Boolean) Controls whether {{slm-init}} records the history of actions taken as part of {{slm-init}} policies to the `slm-history-*` indices. Defaults to `true`. + +$$$slm-retention-schedule$$$ + +`slm.retention_schedule` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), [cron scheduler value](docs-content://explore-analyze/alerts-cases/watcher/schedule-types.md#schedule-cron)) Controls when the [retention task](docs-content://deploy-manage/tools/snapshot-and-restore/create-snapshots.md#slm-retention-task) runs. Can be a periodic or absolute time schedule. Supports all values supported by the [cron scheduler](docs-content://explore-analyze/alerts-cases/watcher/schedule-types.md#schedule-cron). Defaults to daily at 1:30am UTC: `0 30 1 * * ?`. + +$$$slm-retention-duration$$$ + +`slm.retention_duration` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), [time value](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) Limits how long {{slm-init}} should spend deleting old snapshots. Defaults to one hour: `1h`. + +$$$slm-health-failed-snapshot-warn-threshold$$$ + +`slm.health.failed_snapshot_warn_threshold` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting), Long) The number of failed invocations since last successful snapshot that indicate a problem with the policy in the health api. Defaults to a health api warning after five repeated failures: `5L`. + +$$$repositories-url-allowed$$$ + +`repositories.url.allowed_urls` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the [read-only URL repositories](docs-content://deploy-manage/tools/snapshot-and-restore/read-only-url-repository.md) that snapshots can be restored from. + + diff --git a/docs/reference/elasticsearch/configuration-reference/thread-pool-settings.md b/docs/reference/elasticsearch/configuration-reference/thread-pool-settings.md new file mode 100644 index 0000000000000..2e2844dc22d1a --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/thread-pool-settings.md @@ -0,0 +1,139 @@ +--- +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-threadpool.html +--- + +# Thread pool settings [modules-threadpool] + +A node uses several thread pools to manage memory consumption. Queues associated with many of the thread pools enable pending requests to be held instead of discarded. + +There are several thread pools, but the important ones include: + +`generic` +: For generic operations (for example, background node discovery). Thread pool type is `scaling`. + +$$$search-threadpool$$$ + +`search` +: For count/search operations at the shard level. Used also by fetch and other search related operations Thread pool type is `fixed` with a size of `int((`[`# of allocated processors`](#node.processors)` * 3) / 2) + 1`, and queue_size of `1000`. + +$$$search-throttled$$$`search_throttled` +: For count/search/suggest/get operations on `search_throttled indices`. Thread pool type is `fixed` with a size of `1`, and queue_size of `100`. + +`search_coordination` +: For lightweight search-related coordination operations. Thread pool type is `fixed` with a size of `(`[`# of allocated processors`](#node.processors)`) / 2`, and queue_size of `1000`. + +`get` +: For get operations. Thread pool type is `fixed` with a size of `int((`[`# of allocated processors`](#node.processors)` * 3) / 2) + 1`, and queue_size of `1000`. + +`analyze` +: For analyze requests. Thread pool type is `fixed` with a size of `1`, queue size of `16`. + +`write` +: For single-document index/delete/update, ingest processors, and bulk requests. Thread pool type is `fixed` with a size of [`# of allocated processors`](#node.processors), queue_size of `10000`. The maximum size for this pool is `1 + `[`# of allocated processors`](#node.processors). + +`snapshot` +: For snapshot/restore operations. Thread pool type is `scaling` with a keep-alive of `5m`. On nodes with at least 750MB of heap the maximum size of this pool is `10` by default. On nodes with less than 750MB of heap the maximum size of this pool is `min(5, (`[`# of allocated processors`](#node.processors)`) / 2)` by default. + +`snapshot_meta` +: For snapshot repository metadata read operations. Thread pool type is `scaling` with a keep-alive of `5m` and a max of `min(50, (`[`# of allocated processors`](#node.processors)`* 3))`. + +`warmer` +: For segment warm-up operations. Thread pool type is `scaling` with a keep-alive of `5m` and a max of `min(5, (`[`# of allocated processors`](#node.processors)`) / 2)`. + +`refresh` +: For refresh operations. Thread pool type is `scaling` with a keep-alive of `5m` and a max of `min(10, (`[`# of allocated processors`](#node.processors)`) / 2)`. + +`fetch_shard_started` +: For listing shard states. Thread pool type is `scaling` with keep-alive of `5m` and a default maximum size of `2 * `[`# of allocated processors`](#node.processors). + +`fetch_shard_store` +: For listing shard stores. Thread pool type is `scaling` with keep-alive of `5m` and a default maximum size of `2 * `[`# of allocated processors`](#node.processors). + +`flush` +: For [flush](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-flush) and [translog](/reference/elasticsearch/index-settings/translog.md) `fsync` operations. Thread pool type is `scaling` with a keep-alive of `5m` and a default maximum size of `min(5, (`[`# of allocated processors`](#node.processors)`) / 2)`. + +`force_merge` +: For [force merge](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-forcemerge) operations. Thread pool type is `fixed` with a size of `max(1, (`[`# of allocated processors`](#node.processors)`) / 8)` and an unbounded queue size. + +`management` +: For cluster management. Thread pool type is `scaling` with a keep-alive of `5m` and a default maximum size of `5`. + +`system_read` +: For read operations on system indices. Thread pool type is `fixed` with a default maximum size of `min(5, (`[`# of allocated processors`](#node.processors)`) / 2)`. + +`system_write` +: For write operations on system indices. Thread pool type is `fixed` with a default maximum size of `min(5, (`[`# of allocated processors`](#node.processors)`) / 2)`. + +`system_critical_read` +: For critical read operations on system indices. Thread pool type is `fixed` with a default maximum size of `min(5, (`[`# of allocated processors`](#node.processors)`) / 2)`. + +`system_critical_write` +: For critical write operations on system indices. Thread pool type is `fixed` with a default maximum size of `min(5, (`[`# of allocated processors`](#node.processors)`) / 2)`. + +`watcher` +: For [watch executions](docs-content://explore-analyze/alerts-cases/watcher.md). Thread pool type is `fixed` with a default maximum size of `min(5 * (`[`# of allocated processors`](#node.processors)`), 50)` and queue_size of `1000`. + +$$$modules-threadpool-esql$$$`esql_worker` +: Executes [{{esql}}](docs-content://explore-analyze/query-filter/languages/esql.md) operations. Thread pool type is `fixed` with a size of `int((`[`# of allocated processors`](#node.processors) ` * 3) / 2) + 1`, and queue_size of `1000`. + +Thread pool settings are [static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting) and can be changed by editing `elasticsearch.yml`. Changing a specific thread pool can be done by setting its type-specific parameters; for example, changing the number of threads in the `write` thread pool: + +```yaml +thread_pool: + write: + size: 30 +``` + +## Thread pool types [thread-pool-types] + +The following are the types of thread pools and their respective parameters: + +### `fixed` [fixed-thread-pool] + +The `fixed` thread pool holds a fixed size of threads to handle the requests with a queue (optionally bounded) for pending requests that have no threads to service them. + +The `size` parameter controls the number of threads. + +The `queue_size` allows to control the size of the queue of pending requests that have no threads to execute them. By default, it is set to `-1` which means its unbounded. When a request comes in and the queue is full, it will abort the request. + +```yaml +thread_pool: + write: + size: 30 + queue_size: 1000 +``` + + +### `scaling` [scaling-thread-pool] + +The `scaling` thread pool holds a dynamic number of threads. This number is proportional to the workload and varies between the value of the `core` and `max` parameters. + +The `keep_alive` parameter determines how long a thread should be kept around in the thread pool without it doing any work. + +```yaml +thread_pool: + warmer: + core: 1 + max: 8 + keep_alive: 2m +``` + + + +## Allocated processors setting [node.processors] + +The number of processors is automatically detected, and the thread pool settings are automatically set based on it. In some cases it can be useful to override the number of detected processors. This can be done by explicitly setting the `node.processors` setting. This setting is bounded by the number of available processors and accepts floating point numbers, which can be useful in environments where the {{es}} nodes are configured to run with CPU limits, such as cpu shares or quota under `Cgroups`. + +```yaml +node.processors: 2 +``` + +There are a few use-cases for explicitly overriding the `node.processors` setting: + +1. If you are running multiple instances of {{es}} on the same host but want {{es}} to size its thread pools as if it only has a fraction of the CPU, you should override the `node.processors` setting to the desired fraction, for example, if you’re running two instances of {{es}} on a 16-core machine, set `node.processors` to 8. Note that this is an expert-level use case and there’s a lot more involved than just setting the `node.processors` setting as there are other considerations like changing the number of garbage collector threads, pinning processes to cores, and so on. +2. Sometimes the number of processors is wrongly detected and in such cases explicitly setting the `node.processors` setting will workaround such issues. + +In order to check the number of processors detected, use the nodes info API with the `os` flag. + + diff --git a/docs/reference/elasticsearch/configuration-reference/transforms-settings.md b/docs/reference/elasticsearch/configuration-reference/transforms-settings.md new file mode 100644 index 0000000000000..1408aba965307 --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/transforms-settings.md @@ -0,0 +1,30 @@ +--- +navigation_title: "{{transforms-cap}} settings" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/transform-settings.html +--- + +# {{transforms-cap}} settings in Elasticsearch [transform-settings] + + +You do not need to configure any settings to use {{transforms}}. It is enabled by default. + + +## General {{transforms}} settings [general-transform-settings] + +`node.roles: [ transform ]` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set `node.roles` to contain `transform` to identify the node as a *transform node*. If you want to run {{transforms}}, there must be at least one {{transform}} node in your cluster. + + If you set `node.roles`, you must explicitly specify all the required roles for the node. To learn more, refer to [Node settings](/reference/elasticsearch/configuration-reference/node-settings.md). + + ::::{important} + It is strongly recommended that dedicated {{transform}} nodes also have the `remote_cluster_client` role; otherwise, {{ccs}} fails when used in {{transforms}}. See [Remote-eligible node](docs-content://deploy-manage/distributed-architecture/clusters-nodes-shards/node-roles.md#remote-node). + :::: + + +`xpack.transform.enabled` +: [7.8.0] ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) This deprecated setting no longer has any effect. + +`xpack.transform.num_transform_failure_retries` +: ([Dynamic](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings)) The number of times that a {{transform}} retries when it experiences a non-fatal error. Once the number of retries is exhausted, the {{transform}} task is marked as `failed`. The default value is `10` with a valid minimum of `0` and maximum of `100`. If a {{transform}} is already running, it has to be restarted to use the changed setting. The `num_failure_retries` setting can also be specified on an individual {{transform}} level. Specifying this setting for each {{transform}} individually is recommended. + diff --git a/docs/reference/elasticsearch/configuration-reference/watcher-settings.md b/docs/reference/elasticsearch/configuration-reference/watcher-settings.md new file mode 100644 index 0000000000000..3f1aad0d276cd --- /dev/null +++ b/docs/reference/elasticsearch/configuration-reference/watcher-settings.md @@ -0,0 +1,565 @@ +--- +navigation_title: "{{watcher}} settings" +mapped_pages: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/notification-settings.html +--- + +# {{watcher}} settings in Elasticsearch [notification-settings] + + +$$$notification-settings-description$$$ +You configure {{watcher}} settings to set up {{watcher}} and send notifications via [email](#email-notification-settings), [Slack](#slack-notification-settings), and [PagerDuty](#pagerduty-notification-settings). + +All of these settings can be added to the `elasticsearch.yml` configuration file, with the exception of the secure settings, which you add to the {{es}} keystore. For more information about creating and updating the {{es}} keystore, see [Secure settings](docs-content://deploy-manage/security/secure-settings.md). Dynamic settings can also be updated across a cluster with the [cluster update settings API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings). + +## General Watcher Settings [general-notification-settings] + +`xpack.watcher.enabled` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set to `false` to disable {{watcher}} on the node. + +$$$xpack-watcher-encrypt-sensitive-data$$$ + +`xpack.watcher.encrypt_sensitive_data` ![logo cloud](https://doc-icons.s3.us-east-2.amazonaws.com/logo_cloud.svg "Supported on {{ess}}") +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set to `true` to encrypt sensitive data. If this setting is enabled, you must also specify the `xpack.watcher.encryption_key` setting. For more information, see [*Encrypting sensitive data in {{watcher}}*](docs-content://explore-analyze/alerts-cases/watcher/encrypting-data.md). + +`xpack.watcher.encryption_key` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Specifies the path to a file that contains a key for encrypting sensitive data. If `xpack.watcher.encrypt_sensitive_data` is set to `true`, this setting is required. For more information, see [*Encrypting sensitive data in {{watcher}}*](docs-content://explore-analyze/alerts-cases/watcher/encrypting-data.md). + +`xpack.watcher.max.history.record.size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum size watcher history record that can be written into the watcher history index. Any larger history record will have some of its larger fields removed. Defaults to 10mb. + +`xpack.http.proxy.host` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the address of the proxy server to use to connect to HTTP services. + +`xpack.http.proxy.port` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the port number to use to connect to the proxy server. + +`xpack.http.proxy.scheme` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Protocol used to communicate with the proxy server. Valid values are `http` and `https`. Defaults to the protocol used in the request. + +`xpack.http.default_connection_timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum period to wait until abortion of the request, when a connection is being initiated. + +`xpack.http.default_read_timeout` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The maximum period of inactivity between two data packets, before the request is aborted. + +`xpack.http.tcp.keep_alive` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Whether to enable TCP keepalives on HTTP connections. Defaults to `true`. + +`xpack.http.connection_pool_ttl` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The time-to-live of connections in the connection pool. If a connection is not re-used within this timeout, it is closed. By default, the time-to-live is infinite meaning that connections never expire. + +`xpack.http.max_response_size` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the maximum size an HTTP response is allowed to have, defaults to `10mb`, the maximum configurable value is `50mb`. + +`xpack.http.whitelist` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A list of URLs, that the internal HTTP client is allowed to connect to. This client is used in the HTTP input, the webhook, the slack, pagerduty, and jira actions. This setting can be updated dynamically. It defaults to `*` allowing everything. Note: If you configure this setting and you are using one of the slack/pagerduty actions, you have to ensure that the corresponding endpoints are explicitly allowed as well. + + +## {{watcher}} HTTP TLS/SSL settings [ssl-notification-settings] + +You can configure the following TLS/SSL settings. + +`xpack.http.ssl.supported_protocols` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported protocols with versions. Valid protocols: `SSLv2Hello`, `SSLv3`, `TLSv1`, `TLSv1.1`, `TLSv1.2`, `TLSv1.3`. If the JVM’s SSL provider supports TLSv1.3, the default is `TLSv1.3,TLSv1.2,TLSv1.1`. Otherwise, the default is `TLSv1.2,TLSv1.1`. + + {{es}} relies on your JDK’s implementation of SSL and TLS. View [Supported SSL/TLS versions by JDK version](docs-content://deploy-manage/security/supported-ssltls-versions-by-jdk-version.md) for more information. + + ::::{note} + If `xpack.security.fips_mode.enabled` is `true`, you cannot use `SSLv2Hello` or `SSLv3`. See [FIPS 140-2](docs-content://deploy-manage/security/fips-140-2.md). + :::: + + +`xpack.http.ssl.verification_mode` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Defines how to verify the certificates presented by another party in the TLS connection: + + ::::{dropdown} Valid values + `full` + : Validates that the provided certificate: has an issue date that’s within the `not_before` and `not_after` dates; chains to a trusted Certificate Authority (CA); has a `hostname` or IP address that matches the names within the certificate. + + `certificate` + : Validates the provided certificate and verifies that it’s signed by a trusted authority (CA), but doesn’t check the certificate `hostname`. + + `none` + : Performs no certificate validation. + + ::::{important} + Setting certificate validation to `none` disables many security benefits of SSL/TLS, which is very dangerous. Only set this value if instructed by Elastic Support as a temporary diagnostic mechanism when attempting to resolve TLS errors. + :::: + + + :::: + + + Defaults to `full`. + + +`xpack.http.ssl.cipher_suites` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Supported cipher suites vary depending on which version of Java you use. For example, for version 12 the default value is `TLS_AES_256_GCM_SHA384`, `TLS_AES_128_GCM_SHA256`, `TLS_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256`, `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256`, `TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA`, `TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA`, `TLS_RSA_WITH_AES_256_GCM_SHA384`, `TLS_RSA_WITH_AES_128_GCM_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA256`, `TLS_RSA_WITH_AES_128_CBC_SHA256`, `TLS_RSA_WITH_AES_256_CBC_SHA`, `TLS_RSA_WITH_AES_128_CBC_SHA`. + + For more information, see Oracle’s [Java Cryptography Architecture documentation](https://docs.oracle.com/en/java/javase/11/security/oracle-providers.md#GUID-7093246A-31A3-4304-AC5F-5FB6400405E2). + + +### {{watcher}} HTTP TLS/SSL key and trusted certificate settings [watcher-tls-ssl-key-trusted-certificate-settings] + +The following settings are used to specify a private key, certificate, and the trusted certificates that should be used when communicating over an SSL/TLS connection. A private key and certificate are optional and would be used if the server requires client authentication for PKI authentication. + + +### PEM encoded files [_pem_encoded_files_6] + +When using PEM encoded files, use the following settings: + +`xpack.http.ssl.key` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Path to a PEM encoded file containing the private key. + + If HTTP client authentication is required, it uses this file. You cannot use this setting and `ssl.keystore.path` at the same time. + + +`xpack.http.ssl.secure_key_passphrase` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The passphrase that is used to decrypt the private key. Since the key might not be encrypted, this value is optional. + +`xpack.http.ssl.certificate` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Specifies the path for the PEM encoded certificate (or certificate chain) that is associated with the key. + + This setting can be used only if `ssl.key` is set. + + +`xpack.http.ssl.certificate_authorities` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) List of paths to PEM encoded certificate files that should be trusted. + + This setting and `ssl.truststore.path` cannot be used at the same time. + + + +### Java keystore files [_java_keystore_files_6] + +When using Java keystore files (JKS), which contain the private key, certificate and certificates that should be trusted, use the following settings: + +`xpack.http.ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`xpack.http.ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + +`xpack.http.ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + +`xpack.http.ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`xpack.http.ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + +### PKCS#12 files [watcher-pkcs12-files] + +{{es}} can be configured to use PKCS#12 container files (`.p12` or `.pfx` files) that contain the private key, certificate and certificates that should be trusted. + +PKCS#12 files are configured in the same way as Java keystore files: + +`xpack.http.ssl.keystore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore file that contains a private key and certificate. + + It must be either a Java keystore (jks) or a PKCS#12 file. You cannot use this setting and `ssl.key` at the same time. + + +`xpack.http.ssl.keystore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The format of the keystore file. It must be either `jks` or `PKCS12`. If the keystore path ends in ".p12", ".pfx", or ".pkcs12", this setting defaults to `PKCS12`. Otherwise, it defaults to `jks`. + +`xpack.http.ssl.keystore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the keystore. + +`xpack.http.ssl.keystore.secure_key_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) The password for the key in the keystore. The default is the keystore password. + +`xpack.http.ssl.truststore.path` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The path for the keystore that contains the certificates to trust. It must be either a Java keystore (jks) or a PKCS#12 file. + + You cannot use this setting and `ssl.certificate_authorities` at the same time. + + +`xpack.http.ssl.truststore.type` +: ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) Set this to `PKCS12` to indicate that the truststore is a PKCS#12 file. + +`xpack.http.ssl.truststore.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md)) Password for the truststore. + + + +## Email Notification Settings [email-notification-settings] + +You can configure the following email notification settings in `elasticsearch.yml`. For more information about sending notifications via email, see [Configuring email actions](docs-content://explore-analyze/alerts-cases/watcher/actions-email.md#configuring-email-actions). + +`xpack.notification.email.default_account` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Default email account to use. + + If you configure multiple email accounts, you must either configure this setting or specify the email account to use in the [`email`](docs-content://explore-analyze/alerts-cases/watcher/actions-email.md) action. See [Configuring email accounts](docs-content://explore-analyze/alerts-cases/watcher/actions-email.md#configuring-email). + + +`xpack.notification.email.recipient_allowlist` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Specifies addresses to which emails are allowed to be sent. Emails with recipients (`To:`, `Cc:`, or `Bcc:`) outside of these patterns will be rejected and an error thrown. This setting defaults to `["*"]` which means all recipients are allowed. Simple globbing is supported, such as `list-*@company.com` in the list of allowed recipients. + +::::{note} +This setting can’t be used at the same time as `xpack.notification.email.account.domain_allowlist` and an error will be thrown if both are set at the same time. This setting can be used to specify domains to allow by using a wildcard pattern such as `*@company.com`. +:::: + + +`xpack.notification.email.account` +: Specifies account information for sending notifications via email. You can specify the following email account attributes: + +`xpack.notification.email.account.domain_allowlist` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Specifies domains to which emails are allowed to be sent. Emails with recipients (`To:`, `Cc:`, or `Bcc:`) outside of these domains will be rejected and an error thrown. This setting defaults to `["*"]` which means all domains are allowed. Simple globbing is supported, such as `*.company.com` in the list of allowed domains. + +::::{note} +This setting can’t be used at the same time as `xpack.notification.email.recipient_allowlist` and an error will be thrown if both are set at the same time. +:::: + + +$$$email-account-attributes$$$ + +`profile` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The [email profile](docs-content://explore-analyze/alerts-cases/watcher/actions-email.md#configuring-email) to use to build the MIME messages that are sent from the account. Valid values: `standard`, `gmail` and `outlook`. Defaults to `standard`. + +`email_defaults.*` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) An optional set of email attributes to use as defaults for the emails sent from the account. See [Email action attributes](docs-content://explore-analyze/alerts-cases/watcher/actions-email.md#email-action-attributes) for the supported attributes. + +`smtp.auth` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Set to `true` to attempt to authenticate the user using the AUTH command. Defaults to `false`. + +`smtp.host` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The SMTP server to connect to. Required. + +`smtp.port` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The SMTP server port to connect to. Defaults to 25. + +`smtp.user` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The user name for SMTP. Required. + +`smtp.secure_password` +: ([Secure](docs-content://deploy-manage/security/secure-settings.md), [reloadable](docs-content://deploy-manage/security/secure-settings.md#reloadable-secure-settings)) The password for the specified SMTP user. + +`smtp.starttls.enable` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Set to `true` to enable the use of the `STARTTLS` command (if supported by the server) to switch the connection to a TLS-protected connection before issuing any login commands. Note that an appropriate trust store must be configured so that the client will trust the server’s certificate. Defaults to `false`. + +`smtp.starttls.required` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) If `true`, then `STARTTLS` will be required. If that command fails, the connection will fail. Defaults to `false`. + +`smtp.ssl.trust` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A list of SMTP server hosts that are assumed trusted and for which certificate verification is disabled. If set to "*", all hosts are trusted. If set to a whitespace separated list of hosts, those hosts are trusted. Otherwise, trust depends on the certificate the server presents. + +`smtp.timeout` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The socket read timeout. Default is two minutes. + +`smtp.connection_timeout` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The socket connection timeout. Default is two minutes. + +`smtp.write_timeout` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) The socket write timeout. Default is two minutes. + +`smtp.local_address` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A configurable local address when sending emails. Not configured by default. + +`smtp.local_port` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) A configurable local port when sending emails. Not configured by default. + +`smtp.send_partial` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) Send an email, despite one of the receiver addresses being invalid. + +`smtp.wait_on_quit` +: ([Dynamic](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#dynamic-cluster-setting)) If set to false the QUIT command is sent and the connection closed. If set to true, the QUIT command is sent and a reply is waited for. True by default. + +`xpack.notification.email.html.sanitization.allow` +: Specifies the HTML elements that are allowed in email notifications. For more information, see [Configuring HTML sanitization options](docs-content://explore-analyze/alerts-cases/watcher/actions-email.md#email-html-sanitization). You can specify individual HTML elements and the following HTML feature groups: + + $$$html-feature-groups$$$ + + `_tables` + : ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) All table related elements: ``, ``, ``, ``, ``, ``, and ``. + + `_blocks` + : ([Static](docs-content://deploy-manage/deploy/self-managed/configure-elasticsearch.md#static-cluster-setting)) The following block elements: `

`, `

`, `

`, `

`, `

`, `

`, `

`, `
`, `
`, `
`, `
`, `